This script will include all steps in the “main pipeline” part of my thesis project. This includes differential analysis of the reference airway Current vs never smoker dataset (A1, GSE63127), differential expression analysis of the TCGA-LUAD lung adenocarcinoma expression and methylation datasets, and the reference “persistent” airway current vs former vs never smoker dataset (A2, GSE7895). This includes all normalization, quality control, and filtering steps.
Notes: - I will opt to keep all genes with FDR < 0.05 until the step with filtering and dataset comparisons.
library(EnhancedVolcano, verbose = FALSE)
## Loading required package: ggplot2
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 4.3.3
library(GEOquery, verbose = FALSE)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(limma, verbose = FALSE)
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(umap, verbose = FALSE)
library(dplyr, verbose = FALSE)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Version info: R 4.2.2, Biobase 2.58.0, GEOquery 2.66.0, limma 3.54.0
################################################################
# Differential expression analysis with limma
# load series and platform data from GEO (date: 2024/10/15)
gset <- getGEO("GSE63127", GSEMatrix =TRUE, AnnotGPL=TRUE)
## Found 1 file(s)
## GSE63127_series_matrix.txt.gz
if (length(gset) > 1) idx <- grep("GPL570", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]
# make proper column names to match toptable
fvarLabels(gset) <- make.names(fvarLabels(gset))
# group membership for all samples
gsms <- paste0("X00100011111X00000000000X00000X000000000000X000X00",
"XX00X0XXXXXXXXX1111111111111111111111X111X11111111",
"XXXX1XXXXXXXXXXXXXXXXXXXXXXXX001000000010100111111",
"01110111110011111001110011101011111001110101100011",
"111111111111111111111111111111")
sml <- strsplit(gsms, split="")[[1]]
# filter out excluded samples (marked as "X")
sel <- which(sml != "X")
sml <- sml[sel]
gset <- gset[ ,sel]
gset <- gset[complete.cases(exprs(gset)), ] # skip missing values
length(sml) # 182 samples
## [1] 182
2024/11/07: I am going to try doing this without the quantile normalization to see whether it’s still okay
## Make histograms and boxplots to check if the data is log-transformed and needs quantile normalization ##
hist(as.matrix(exprs(gset))) # skewed left, needs log2 transform
boxplot(exprs(gset)) # scary-looking
max(exprs(gset)) # 136808
## [1] 136808
min(exprs(gset)) # 0.0657913
## [1] 0.0657913
# Should do log2 and quantile normalization
## log2 and Quantile normalization ##
exprs(gset) <- log2(exprs(gset)+1)
##exprs(gset) <- normalizeBetweenArrays(exprs(gset)) # quantile normalization: no longer doing this for now
hist(as.matrix(exprs(gset))) # much better
boxplot(exprs(gset)) # Look reasonable
min(exprs(gset))
## [1] 0.09192496
max(exprs(gset))
## [1] 17.0618
# assign samples to groups and set up design matrix
gs <- factor(sml)
groups <- make.names(c("non-smoker","smoker"))
levels(gs) <- groups
gset$group <- gs
phenotypic_data <- pData(gset) # Extract phenotypic data
# The phenotypic data is terrible.
# This is filtered down to the samples that were included.
# I will first try to clean up the phenotypic data.
head(phenotypic_data)
## title geo_accession status
## GSM190150 Small airways, non-smoker 029 GSM190150 Public on Dec 16 2008
## GSM190153 Small airways, non-smoker 036 GSM190153 Public on Jun 17 2008
## GSM254157 small airways, smoker 112 GSM254157 Public on Jun 17 2008
## GSM298223 small airways, non-smoker 050 GSM298223 Public on Jul 13 2009
## GSM298227 small airways, non-smoker 076 GSM298227 Public on Jul 13 2009
## GSM298228 small airways, non-smoker 080 GSM298228 Public on Jul 13 2009
## submission_date last_update_date type channel_count
## GSM190150 May 17 2007 Aug 28 2018 RNA 1
## GSM190153 May 17 2007 Aug 28 2018 RNA 1
## GSM254157 Jan 03 2008 Aug 28 2018 RNA 1
## GSM298223 Jun 13 2008 Nov 12 2009 RNA 1
## GSM298227 Jun 13 2008 Aug 28 2018 RNA 1
## GSM298228 Jun 13 2008 Aug 28 2018 RNA 1
## source_name_ch1
## GSM190150 airway epithelial cells obtained by bronchoscopy and brushing
## GSM190153 airway epithelial cells obtained by bronchoscopy and brushing
## GSM254157 airway epithelial cells obtained by bronchoscopy and brushing
## GSM298223 airway epithelial cells obtained by bronchoscopy and brushing
## GSM298227 airway epithelial cells obtained by bronchoscopy and brushing
## GSM298228 airway epithelial cells obtained by bronchoscopy and brushing
## organism_ch1 characteristics_ch1 characteristics_ch1.1
## GSM190150 Homo sapiens age: 34 sex: M
## GSM190153 Homo sapiens age: 45 sex: F
## GSM254157 Homo sapiens age: 45 sex: M
## GSM298223 Homo sapiens age: 38 sex: M
## GSM298227 Homo sapiens age: 29 sex: M
## GSM298228 Homo sapiens age: 39 sex: F
## characteristics_ch1.2 characteristics_ch1.3
## GSM190150 ethnic group: black smoking status: non-smoker
## GSM190153 ethnic group: hispanic smoking status: non-smoker
## GSM254157 ethnic group: white smoking status: smoker, 23 pack-years
## GSM298223 ethnic group: hispanic smoking status: non-smoker
## GSM298227 ethnic group: hispanic smoking status: non-smoker
## GSM298228 ethnic group: asian smoking status: non-smoker
## molecule_ch1
## GSM190150 total RNA
## GSM190153 total RNA
## GSM254157 total RNA
## GSM298223 total RNA
## GSM298227 total RNA
## GSM298228 total RNA
## extract_protocol_ch1
## GSM190150 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM190153 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM254157 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM298223 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM298227 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM298228 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## label_ch1
## GSM190150 biotin
## GSM190153 biotin
## GSM254157 biotin
## GSM298223 biotin
## GSM298227 biotin
## GSM298228 biotin
## label_protocol_ch1
## GSM190150 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 3 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM190153 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 3 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM254157 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 1-2 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM298223 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 3 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM298227 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 1-2 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM298228 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 1-2 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## taxid_ch1
## GSM190150 9606
## GSM190153 9606
## GSM254157 9606
## GSM298223 9606
## GSM298227 9606
## GSM298228 9606
## hyb_protocol
## GSM190150 Following fragmentation, 15 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM190153 Following fragmentation, 15 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM254157 Following fragmentation, 10 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM298223 Following fragmentation, 15 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM298227 Following fragmentation, 10 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM298228 Following fragmentation, 10 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## scan_protocol
## GSM190150 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM190153 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM254157 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM298223 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM298227 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM298228 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## description
## GSM190150 small airways, non-smoker
## GSM190153 small airways, non-smoker
## GSM254157 small airways, smoker 112
## GSM298223 none
## GSM298227 none
## GSM298228 none
## data_processing
## GSM190150 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM190153 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM254157 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM298223 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM298227 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM298228 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## platform_id contact_name contact_email
## GSM190150 GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM190153 GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM254157 GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM298223 GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM298227 GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM298228 GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## contact_laboratory contact_department
## GSM190150 Crystal Department of Genetic Medicine
## GSM190153 Crystal Department of Genetic Medicine
## GSM254157 Crystal Department of Genetic Medicine
## GSM298223 Crystal Department of Genetic Medicine
## GSM298227 Crystal Department of Genetic Medicine
## GSM298228 Crystal Department of Genetic Medicine
## contact_institute contact_address contact_city
## GSM190150 Weill Cornell Medical College 1300 York Avenue New York
## GSM190153 Weill Cornell Medical College 1300 York Avenue New York
## GSM254157 Weill Cornell Medical College 1300 York Avenue New York
## GSM298223 Weill Cornell Medical College 1300 York Avenue New York
## GSM298227 Weill Cornell Medical College 1300 York Avenue New York
## GSM298228 Weill Cornell Medical College 1300 York Avenue New York
## contact_state contact_zip/postal_code contact_country
## GSM190150 NY 10021 USA
## GSM190153 NY 10021 USA
## GSM254157 NY 10021 USA
## GSM298223 NY 10021 USA
## GSM298227 NY 10021 USA
## GSM298228 NY 10021 USA
## supplementary_file
## GSM190150 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190150/suppl/GSM190150.CEL.gz
## GSM190153 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190153/suppl/GSM190153.CEL.gz
## GSM254157 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM254nnn/GSM254157/suppl/GSM254157.CEL.gz
## GSM298223 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298223/suppl/GSM298223.CEL.gz
## GSM298227 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298227/suppl/GSM298227.CEL.gz
## GSM298228 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298228/suppl/GSM298228.CEL.gz
## supplementary_file.1
## GSM190150 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190150/suppl/GSM190150.CHP.gz
## GSM190153 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190153/suppl/GSM190153.CHP.gz
## GSM254157 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM254nnn/GSM254157/suppl/GSM254157.CHP.gz
## GSM298223 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298223/suppl/GSM298223.CHP.gz
## GSM298227 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298227/suppl/GSM298227.CHP.gz
## GSM298228 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298228/suppl/GSM298228.CHP.gz
## data_row_count relation relation.1
## GSM190150 54675 Reanalyzed by: GSE119087
## GSM190153 54675 Reanalyzed by: GSE60486 Reanalyzed by: GSE119087
## GSM254157 54675 Reanalyzed by: GSE60486 Reanalyzed by: GSE119087
## GSM298223 54675
## GSM298227 54675 Reanalyzed by: GSE119087
## GSM298228 54675 Reanalyzed by: GSE119087
## age:ch1 cilia length:ch1 copd status:ch1
## GSM190150 34 <NA> <NA>
## GSM190153 45 <NA> <NA>
## GSM254157 45 <NA> <NA>
## GSM298223 38 <NA> <NA>
## GSM298227 29 <NA> <NA>
## GSM298228 39 <NA> <NA>
## department of genetic medicine id:ch1 ethnic group:ch1 ethnicity:ch1
## GSM190150 <NA> black <NA>
## GSM190153 <NA> hispanic <NA>
## GSM254157 <NA> white <NA>
## GSM298223 <NA> hispanic <NA>
## GSM298227 <NA> hispanic <NA>
## GSM298228 <NA> asian <NA>
## serum 25-oh-d:ch1 sex:ch1 smoking status:ch1 group
## GSM190150 <NA> M non-smoker non.smoker
## GSM190153 <NA> F non-smoker non.smoker
## GSM254157 <NA> M smoker, 23 pack-years smoker
## GSM298223 <NA> M non-smoker non.smoker
## GSM298227 <NA> M non-smoker non.smoker
## GSM298228 <NA> F non-smoker non.smoker
# So I think the features I want to keep will be:
# Dates of submission/updates etc, sex, ethnicity, smoking status
# Keep the columns that might contain data of interest, which will need to be cleaned up.
# List of column names I want to keep and clean up into usable labels
columns_to_find <- c("geo_accession","status","submission_date","last_update_date","characteristics_ch1","characteristics_ch1.1","characteristics_ch1.2","characteristics_ch1.3","age:ch1","cilia length:ch1","ethnic group:ch1","ethnicity:ch1","serum 25-oh-d:ch1","sex:ch1","smoking status:ch1","group")
# Get the column indexes
indexes <- sapply(columns_to_find, function(col_name) which(names(phenotypic_data) == col_name))
indexes <- unlist(indexes)
phenotypic_data <- phenotypic_data[,c(indexes)]
# Now I need to parse out sex, ethnicity, smoking status, and age, vitamin D, pack years.
#Rename "group" as "smoking status"
names(phenotypic_data)[16] <- "smoking_status"
## Grabbing ethnicity values from the columns ##
# Initialize a new column "ethnicity" with NA values
phenotypic_data$ethnicity <- NA
# Function to find 'eth' in a row and return the corresponding value
find_ethnicity <- function(row) {
eth_column <- which(grepl('eth', row))
if (length(eth_column) > 0) {
return(row[eth_column])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "ethnicity" column
phenotypic_data$ethnicity <- apply(phenotypic_data, 1, find_ethnicity)
## Grabbing sex values from the columns ##
# Initialize a new column "sex" with NA values
phenotypic_data$sex <- NA
# Function to find 'sex' in a row and return the corresponding value
find_sex <- function(row) {
sex_column <- which(grepl('sex', row))
if (length(sex_column) > 0) {
return(row[sex_column])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "sex" column
phenotypic_data$sex <- apply(phenotypic_data, 1, find_sex)
## Grabbing pack_years values from the columns ##
# Initialize a new column "pack_years" with NA values
phenotypic_data$pack_years <- NA
# Function to find 'pack_years' in a row and return the corresponding value, but just the first instance
find_pack_years <- function(row) {
pack_years_column <- which(grepl('pack', row))
if (length(pack_years_column) > 0) {
return(row[pack_years_column[1]])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "pack_years" column
phenotypic_data$pack_years <- apply(phenotypic_data, 1, find_pack_years)
#unlist the column
phenotypic_data$pack_years <- unlist(phenotypic_data$pack_years )
## Grabbing age values from the columns ##
# Initialize a new column "age" with NA values
phenotypic_data$age <- NA
# Function to find 'age' in a row and return the corresponding value
find_age <- function(row) {
age_column <- which(grepl('age', row))
if (length(age_column) > 0) {
return(row[age_column])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "age" column
phenotypic_data$age <- apply(phenotypic_data, 1, find_age)
## Grabbing vitamin_d values from the columns ##
# Initialize a new column "vitamin_d" with NA values
phenotypic_data$vitamin_d <- NA
# Function to find 'vitamin_d' in a row and return the corresponding value, first instance
find_vitamin_d <- function(row) {
vitamin_d_column <- which(grepl('vitamin', row))
if (length(vitamin_d_column) > 0) {
return(row[vitamin_d_column[1]])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "vitamin_d" column
phenotypic_data$vitamin_d <- apply(phenotypic_data, 1, find_vitamin_d)
## Grabbing vitamin_d values from the columns ##
# Initialize a new column "vitamin_d" with NA values
phenotypic_data$vitamin_d <- NA
# Function to find 'vitamin_d' in a row and return the corresponding value, first instance
find_vitamin_d <- function(row) {
vitamin_d_column <- which(grepl('vitamin', row))
if (length(vitamin_d_column) > 0) {
return(row[vitamin_d_column[1]])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "vitamin_d" column
phenotypic_data$vitamin_d <- apply(phenotypic_data, 1, find_vitamin_d)
## Grabbing cilia values from the columns ##
# Initialize a new column "cilia_length" with NA values
phenotypic_data$cilia_length <- NA
# Function to find 'cilia' in a row and return the corresponding value, first instance
find_cilia <- function(row) {
cilia_column <- which(grepl('cilia', row))
if (length(cilia_column) > 0) {
return(row[cilia_column[1]])
} else {
return(NA)
}
}
# Apply the function row-wise to populate the "cilia" column
phenotypic_data$cilia_length <- apply(phenotypic_data, 1, find_cilia)
## Now cut out the messy columns
phenotypic_data <- phenotypic_data[,-c(5:15)]
## Remove unnecessary prefix info
phenotypic_data$ethnicity <- gsub(".*: ", "", phenotypic_data$ethnicity )
phenotypic_data$age <- gsub(".*: ", "", phenotypic_data$age)
phenotypic_data$sex <- gsub(".*: ", "", phenotypic_data$sex)
phenotypic_data$vitamin_d <- gsub(".*: ", "", phenotypic_data$vitamin_d)
phenotypic_data$cilia_length <- gsub(".*: ", "", phenotypic_data$cilia_length)
phenotypic_data$pack_years<- gsub(".*, ", "", phenotypic_data$pack_years)
phenotypic_data$pack_years<- gsub("pack-years", "", phenotypic_data$pack_years)
# Reformat the submission dates to be sortable
phenotypic_data <- phenotypic_data %>%
mutate(submission_date = ifelse(submission_date == "Dec 20 2012", "2012-12-20", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "Jan 03 2008", "2008-01-08", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "Jan 31 2013", "2013-01-31", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "Jun 03 2010", "2010-06-03", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "Jun 13 2008", "2008-06-13", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "May 17 2007", "2007-05-17", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "Nov 08 2013", "2013-11-08", submission_date)) %>%
mutate(submission_date = ifelse(submission_date == "Nov 10 2014", "2014-11-10", submission_date))
# assign samples to groups and set up design matrix
gs <- factor(sml)
groups <- make.names(c("non-smoker","smoker"))
levels(gs) <- groups
gset$group <- gs
## Plot PCA 1 ##
colz <- as.numeric(as.factor(gs)) # Get color values from group
plotMDS(exprs(gset),
gene.selection = "common",
main = "PCA for CS vs NS GSE63127",
col = colz,
pch = 1
)
legend("bottom", legend = levels(as.factor(gs)),
fill = unique(colz),
title = "Smoking status")
## We have 4 definite clusters that are not based on smoking status.
## As such, it is a good idea to check the table of sample phenotypic information to look for sources of variation between samples.
pointz <- as.numeric(as.factor(phenotypic_data$submission_date<= "2010-06-03")) # Get point shape values from date of submission: split into 2010 and earlier, post-2010]
## Plot PCA with date information##
plotMDS(exprs(gset),
gene.selection = "common",
main = "PCA for CS vs NS GSE63127",
col = colz, # Colors smokers red and nonsmokers black
pch = pointz
#labels = gset$group
)
legend("bottom",
legend = c("Smokers", "Nonsmokers",
"2010 and Prior", "Post-2010"),
col = c("red", "black", "black", "black"), # Colors: only for smoking status
pch = c(15, 15, 2, 1), # Shapes: 2 = triangle, 1 = circle
pt.cex = c(1, 1, 1, 1), # Adjust size for better visibility
text.col = "black", # Text color
# bty = "n"
) # Box type: 'n' removes border
# Clearly the source of batch effect in PC1 is submission date post-2010.
# Note: I found that the split was at 2010 by doing a bit of playing around with other clustering methods, not shown here.
# First batch correction (submission date)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 4.3.3
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
library(limma)
# Making a batch vector
submission_post_2010_batch <- ifelse(phenotypic_data$submission_date < as.Date("2012-01-01"), 1, 2)
# Adjust the expression matrix for submission date batch effect
exprs_matrix_combat <- ComBat(dat=exprs(gset), batch=submission_post_2010_batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
## Found2batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
## Plot PCA for expression values after first batch correction ##
date_corrected_PCA <- plotMDS(exprs_matrix_combat,
gene.selection = "common",
main = "PCA for CS vs NS GSE63127, corrected for submission date",
col = colz, # Colors smokers red and nonsmokers black
pch = pointz
)
## Some evidence that second source of variation could be due to sex (but only 11/182 samples have sex labels):
plotMDS(exprs_matrix_combat,
gene.selection = "common",
main = "PCA for CS vs NS GSE63127, corrected for submission date",
col = colz, # Colors smokers red and nonsmokers black
#pch = pointz2 # Using separate shapes for all submission dates
labels = phenotypic_data$sex
)
legend("bottom",
legend = c("M = Male", "F = Female", "Smoker", "Nonsmoker"),
col = c("black", "black", "red", "black"),
pch = c(NA, NA, 15, 15)
#title = "Smoking status"
)
## Samples are divided by sex, but 11/182 samples is not enough to draw a conclusion here.
## Second correction for unknown source of variation using ComBat: ##
# Assign batch labels based on the first dimension from MDS (equivalent to PC1), since the dividing line for the batches lies at 0
unknown_batch_labels <- ifelse(date_corrected_PCA$x < 0, 1, 2)
# Do a second batch correction
exprs_matrix_combat_2 <- ComBat(dat=exprs_matrix_combat, batch=unknown_batch_labels, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
## Found2batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
# View PCA plot
plotMDS(exprs_matrix_combat_2,
gene.selection = "common",
main = "PCA for CS vs NS GSE63127 after 2 ComBat corrections",
col = colz, # Colors smokers red and nonsmokers black
pch = pointz
#labels = gset$group
)
legend("topleft",
legend = c("Smokers", "Nonsmokers",
"2010 and Prior", "Post-2010"),
col = c("red", "black", "black", "black"), # Colors: only for smoking status
pch = c(15, 15, 2, 1), # Shapes: 2 = triangle, 1 = circle
pt.cex = c(1, 1, 1, 1), # Adjust size for better visibility
text.col = "black", # Text color
# bty = "n"
)
## Now PC1 corresponds quite well to smoking status after the two ComBat corrections.
# Finish setting up the design matrix
design <- model.matrix(~group + 0, gset)
colnames(design) <- levels(gs)
## Crucial bit: Replace the expression values in gset with the batch corrected ones ##
exprs(gset) <- as.matrix(exprs_matrix_combat_2)
# calculate precision weights and show plot of mean-variance trend
v <- vooma(gset, design, plot=T)
# OR weights by group
# v <- voomaByGroup(gset, group=groups, design, plot=T, cex=0.1, pch=".", col=1:nlevels(gs))
v$genes <- fData(gset) # attach gene annotations
# fit linear model
fit <- lmFit(v)
# set up contrasts of interest and recalculate model coefficients
cts <- paste(groups[2], groups[1], sep="-")
cont.matrix <- makeContrasts(contrasts=cts, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
# compute statistics and table of top significant genes
fit2 <- eBayes(fit2, 0.01)
tT <- topTable(fit2, adjust="fdr", sort.by="B", number=Inf)
tT <- subset(tT, select=c("ID","Gene.symbol","logFC","adj.P.Val"))
# Now I want to filter unlabelled genes, duplicate genes, and adj.P.Val < 0.05
GSE63127_CS_NS_GEO2R_limma_all <- tT %>%
filter(Gene.symbol != "") %>% # Remove blank gene symbols
# filter(adj.P.Val <= 0.05) %>% # Remove FDR > 0.05 genes
group_by(Gene.symbol) %>%
slice_min(adj.P.Val, with_ties = TRUE) %>%
# For probesets mapping to same gene, keep one with lowest FDR. Keep ties for now to check later.
ungroup()
head(GSE63127_CS_NS_GEO2R_limma_all)
## # A tibble: 6 × 4
## ID Gene.symbol logFC adj.P.Val
## <chr> <chr> <dbl> <dbl>
## 1 229819_at A1BG -0.106 0.481
## 2 232462_s_at A1BG-AS1 0.531 0.0224
## 3 220951_s_at A1CF 0.302 0.123
## 4 1558450_at A2M 0.110 0.453
## 5 1564139_at A2M-AS1 -0.138 0.0724
## 6 1553505_at A2ML1 0.145 0.636
GSE63127_CS_NS_GEO2R_limma_sig <- GSE63127_CS_NS_GEO2R_limma_all %>%
filter(adj.P.Val <= 0.05) # Remove FDR > 0.05 genes
head(GSE63127_CS_NS_GEO2R_limma_sig)
## # A tibble: 6 × 4
## ID Gene.symbol logFC adj.P.Val
## <chr> <chr> <dbl> <dbl>
## 1 232462_s_at A1BG-AS1 0.531 2.24e- 2
## 2 218434_s_at AACS 0.128 2.82e- 3
## 3 223593_at AADAT -0.614 9.30e-19
## 4 202852_s_at AAGAB 0.179 3.13e- 3
## 5 225522_at AAK1 0.344 3.86e-15
## 6 220268_at AAMDC 0.657 1.81e- 2
# Checking for ties
ties <- GSE63127_CS_NS_GEO2R_limma_sig %>%
group_by(Gene.symbol) %>%
filter(n() > 1) %>%
ungroup()
print(ties) # No ties
## # A tibble: 0 × 4
## # ℹ 4 variables: ID <chr>, Gene.symbol <chr>, logFC <dbl>, adj.P.Val <dbl>
nrow(GSE63127_CS_NS_GEO2R_limma_sig)
## [1] 7105
log2FC_cutoff1 <- 0.2
v1 <- EnhancedVolcano::EnhancedVolcano(
toptable = GSE63127_CS_NS_GEO2R_limma_all,
lab = GSE63127_CS_NS_GEO2R_limma_all$Gene.symbol,
x = "logFC", # "mean difference" is estimate here
y = "adj.P.Val",
# pCutoffCol = 'min_smoothed_fdr',
xlab = "log2FC",
ylab = "-log10(FDR)",
title = "A1 DEGs",
subtitle = paste0("log2FC cutoff: ", log2FC_cutoff1),
caption = paste0("Total = ", nrow(GSE63127_CS_NS_GEO2R_limma_sig[abs(GSE63127_CS_NS_GEO2R_limma_sig$logFC)>log2FC_cutoff1,]), " significant DEGs above log2FC cutoff"),
col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
legendPosition = "bottom",
labSize = 4,
max.overlaps = 5,
drawConnectors = TRUE,
arrowheads = FALSE,
pCutoff = 0.05,
FCcutoff = log2FC_cutoff1,
gridlines.minor = FALSE,
gridlines.major = FALSE,
xlim = c(-3, 6)
)
v1
## Warning: ggrepel: 4637 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Change date suffix as appropriate if changes are made
#write.table(GSE63127_CS_NS_GEO2R_limma_sig, "../2_Outputs/GSE63127_CS_NS_GEO2R_limma_sig_20241107.txt", sep = '\t')
2024/11/07: Comparing the DEGs list when quantile normalization is used vs not used:
GSE63127_CS_NS_GEO2R_limma_sig_quantile <- read.table("../2_Outputs/GSE63127_CS_NS_GEO2R_limma_sig_20241016.txt", header = TRUE)
GSE63127_CS_NS_GEO2R_limma_sig_no_quantile <- GSE63127_CS_NS_GEO2R_limma_sig
# Compare results
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
##
## Attaching package: 'futile.logger'
## The following object is masked from 'package:mgcv':
##
## scat
venn <- venn.diagram(
list(
DEGs_no_quantile = GSE63127_CS_NS_GEO2R_limma_sig_no_quantile$Gene.symbol,
DEGs_quantile = GSE63127_CS_NS_GEO2R_limma_sig_quantile$Gene.symbol
),
filename = NULL
)
# Display the diagram
grid.newpage()
grid.draw(venn)
The lists agree quite well. The list without quantile normalization is larger. Quantile normalization could be over-normalization and mask some variation. The PCA plots still look good without quantile normalization. I will elect to go forward without quantile normalization. I will have to apply the same to all the other airway datasets for the meta-analysis bit.
library(TCGAbiolinks)
library(SummarizedExperiment)
## Warning: package 'SummarizedExperiment' was built under R version 4.3.2
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.3.3
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:genefilter':
##
## rowSds, rowVars
## The following object is masked from 'package:dplyr':
##
## count
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following objects are masked from 'package:genefilter':
##
## rowSds, rowVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
library(dplyr)
query <- GDCquery(project = "TCGA-LUAD",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
sample.type = c("Primary Tumor", "Solid Tissue Normal"),
workflow.type = "STAR - Counts")
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-LUAD
## --------------------
## oo Filtering results
## --------------------
## ooo By data.type
## ooo By workflow.type
## ooo By sample.type
## ----------------
## oo Checking data
## ----------------
## ooo Checking if there are duplicated cases
## ooo Checking if there are results for the query
## -------------------
## o Preparing output
## -------------------
GDCdownload(query)
## Downloading data for project TCGA-LUAD
## Of the 598 files for download 598 already exist.
## All samples have been already downloaded
data <- GDCprepare(query)
## | | 0% | |0.1672241% ~58 s remaining| |0.3344482% ~36 s remaining| |0.5016722% ~28 s remaining| |0.6688963% ~24 s remaining| |0.8361204% ~24 s remaining| |1.003344% ~22 s remaining | |1.170569% ~21 s remaining | |1.337793% ~20 s remaining | |1.505017% ~19 s remaining | |1.672241% ~18 s remaining | |1.839465% ~17 s remaining |= |2.006689% ~17 s remaining |= |2.173913% ~16 s remaining |= |2.341137% ~16 s remaining |= |2.508361% ~16 s remaining |= |2.675585% ~16 s remaining |= |2.842809% ~15 s remaining |= |3.010033% ~15 s remaining |= |3.177258% ~15 s remaining |= |3.344482% ~15 s remaining |= |3.511706% ~15 s remaining |= |3.67893% ~15 s remaining |== |3.846154% ~14 s remaining |== |4.013378% ~14 s remaining |== |4.180602% ~14 s remaining |== |4.347826% ~14 s remaining |== |4.51505% ~14 s remaining |== |4.682274% ~14 s remaining |== |4.849498% ~14 s remaining |== |5.016722% ~14 s remaining |== |5.183946% ~14 s remaining |== |5.351171% ~14 s remaining |== |5.518395% ~14 s remaining |== |5.685619% ~14 s remaining |=== |5.852843% ~14 s remaining |=== |6.020067% ~13 s remaining |=== |6.187291% ~13 s remaining |=== |6.354515% ~13 s remaining |=== |6.521739% ~13 s remaining |=== |6.688963% ~13 s remaining |=== |6.856187% ~13 s remaining |=== |7.023411% ~13 s remaining |=== |7.190635% ~13 s remaining |=== |7.35786% ~13 s remaining |=== |7.525084% ~13 s remaining |==== |7.692308% ~13 s remaining |==== |7.859532% ~13 s remaining |==== |8.026756% ~13 s remaining |==== |8.19398% ~13 s remaining |==== |8.361204% ~13 s remaining |==== |8.528428% ~13 s remaining |==== |8.695652% ~13 s remaining |==== |8.862876% ~13 s remaining |==== |9.0301% ~13 s remaining |==== |9.197324% ~13 s remaining |==== |9.364548% ~13 s remaining |==== |9.531773% ~13 s remaining |===== |9.698997% ~13 s remaining |===== |9.866221% ~13 s remaining |===== |10.03344% ~13 s remaining |===== |10.20067% ~13 s remaining |===== |10.36789% ~13 s remaining |===== |10.53512% ~12 s remaining |===== |10.70234% ~12 s remaining |===== |10.86957% ~12 s remaining |===== |11.03679% ~12 s remaining |===== |11.20401% ~12 s remaining |===== |11.37124% ~12 s remaining |====== |11.53846% ~12 s remaining |====== |11.70569% ~12 s remaining |====== |11.87291% ~12 s remaining |====== |12.04013% ~12 s remaining |====== |12.20736% ~12 s remaining |====== |12.37458% ~12 s remaining |====== |12.54181% ~12 s remaining |====== |12.70903% ~12 s remaining |====== |12.87625% ~12 s remaining |====== |13.04348% ~12 s remaining |====== |13.2107% ~12 s remaining |====== |13.37793% ~12 s remaining |======= |13.54515% ~12 s remaining |======= |13.71237% ~12 s remaining |======= |13.8796% ~12 s remaining |======= |14.04682% ~12 s remaining |======= |14.21405% ~12 s remaining |======= |14.38127% ~12 s remaining |======= |14.54849% ~12 s remaining |======= |14.71572% ~12 s remaining |======= |14.88294% ~12 s remaining |======= |15.05017% ~12 s remaining |======= |15.21739% ~12 s remaining |======== |15.38462% ~12 s remaining |======== |15.55184% ~12 s remaining |======== |15.71906% ~12 s remaining |======== |15.88629% ~12 s remaining |======== |16.05351% ~12 s remaining |======== |16.22074% ~12 s remaining |======== |16.38796% ~12 s remaining |======== |16.55518% ~11 s remaining |======== |16.72241% ~11 s remaining |======== |16.88963% ~11 s remaining |======== |17.05686% ~11 s remaining |======== |17.22408% ~11 s remaining |========= |17.3913% ~11 s remaining |========= |17.55853% ~11 s remaining |========= |17.72575% ~11 s remaining |========= |17.89298% ~11 s remaining |========= |18.0602% ~11 s remaining |========= |18.22742% ~11 s remaining |========= |18.39465% ~11 s remaining |========= |18.56187% ~11 s remaining |========= |18.7291% ~11 s remaining |========= |18.89632% ~11 s remaining |========= |19.06355% ~11 s remaining |========== |19.23077% ~11 s remaining |========== |19.39799% ~11 s remaining |========== |19.56522% ~11 s remaining |========== |19.73244% ~11 s remaining |========== |19.89967% ~11 s remaining |========== |20.06689% ~11 s remaining |========== |20.23411% ~11 s remaining |========== |20.40134% ~11 s remaining |========== |20.56856% ~11 s remaining |========== |20.73579% ~11 s remaining |========== |20.90301% ~11 s remaining |========== |21.07023% ~11 s remaining |=========== |21.23746% ~11 s remaining |=========== |21.40468% ~11 s remaining |=========== |21.57191% ~11 s remaining |=========== |21.73913% ~11 s remaining |=========== |21.90635% ~11 s remaining |=========== |22.07358% ~11 s remaining |=========== |22.2408% ~11 s remaining |=========== |22.40803% ~11 s remaining |=========== |22.57525% ~11 s remaining |=========== |22.74247% ~11 s remaining |=========== |22.9097% ~11 s remaining |============ |23.07692% ~11 s remaining |============ |23.24415% ~11 s remaining |============ |23.41137% ~10 s remaining |============ |23.5786% ~10 s remaining |============ |23.74582% ~10 s remaining |============ |23.91304% ~10 s remaining |============ |24.08027% ~10 s remaining |============ |24.24749% ~10 s remaining |============ |24.41472% ~10 s remaining |============ |24.58194% ~10 s remaining |============ |24.74916% ~10 s remaining |============ |24.91639% ~10 s remaining |============= |25.08361% ~10 s remaining |============= |25.25084% ~10 s remaining |============= |25.41806% ~10 s remaining |============= |25.58528% ~10 s remaining |============= |25.75251% ~10 s remaining |============= |25.91973% ~10 s remaining |============= |26.08696% ~10 s remaining |============= |26.25418% ~10 s remaining |============= |26.4214% ~10 s remaining |============= |26.58863% ~10 s remaining |============= |26.75585% ~10 s remaining |============== |26.92308% ~10 s remaining |============== |27.0903% ~10 s remaining |============== |27.25753% ~10 s remaining |============== |27.42475% ~10 s remaining |============== |27.59197% ~10 s remaining |============== |27.7592% ~10 s remaining |============== |27.92642% ~10 s remaining |============== |28.09365% ~10 s remaining |============== |28.26087% ~10 s remaining |============== |28.42809% ~10 s remaining |============== |28.59532% ~10 s remaining |============== |28.76254% ~10 s remaining |=============== |28.92977% ~10 s remaining |=============== |29.09699% ~10 s remaining |=============== |29.26421% ~10 s remaining |=============== |29.43144% ~10 s remaining |=============== |29.59866% ~10 s remaining |=============== |29.76589% ~10 s remaining |=============== |29.93311% ~10 s remaining |=============== |30.10033% ~10 s remaining |=============== |30.26756% ~10 s remaining |=============== |30.43478% ~10 s remaining |=============== |30.60201% ~9 s remaining |================ |30.76923% ~9 s remaining |================ |30.93645% ~9 s remaining |================ |31.10368% ~9 s remaining |================ |31.2709% ~9 s remaining |================ |31.43813% ~9 s remaining |================ |31.60535% ~9 s remaining |================ |31.77258% ~9 s remaining |================ |31.9398% ~9 s remaining |================ |32.10702% ~9 s remaining |================ |32.27425% ~9 s remaining |================ |32.44147% ~9 s remaining |================ |32.6087% ~9 s remaining |================= |32.77592% ~9 s remaining |================= |32.94314% ~9 s remaining |================= |33.11037% ~9 s remaining |================= |33.27759% ~9 s remaining |================= |33.44482% ~9 s remaining |================= |33.61204% ~9 s remaining |================= |33.77926% ~9 s remaining |================= |33.94649% ~9 s remaining |================= |34.11371% ~9 s remaining |================= |34.28094% ~9 s remaining |================= |34.44816% ~9 s remaining |================== |34.61538% ~9 s remaining |================== |34.78261% ~9 s remaining |================== |34.94983% ~9 s remaining |================== |35.11706% ~9 s remaining |================== |35.28428% ~9 s remaining |================== |35.45151% ~9 s remaining |================== |35.61873% ~9 s remaining |================== |35.78595% ~9 s remaining |================== |35.95318% ~9 s remaining |================== |36.1204% ~9 s remaining |================== |36.28763% ~9 s remaining |================== |36.45485% ~9 s remaining |=================== |36.62207% ~9 s remaining |=================== |36.7893% ~9 s remaining |=================== |36.95652% ~9 s remaining |=================== |37.12375% ~9 s remaining |=================== |37.29097% ~9 s remaining |=================== |37.45819% ~9 s remaining |=================== |37.62542% ~9 s remaining |=================== |37.79264% ~8 s remaining |=================== |37.95987% ~8 s remaining |=================== |38.12709% ~8 s remaining |=================== |38.29431% ~8 s remaining |==================== |38.46154% ~8 s remaining |==================== |38.62876% ~8 s remaining |==================== |38.79599% ~8 s remaining |==================== |38.96321% ~8 s remaining |==================== |39.13043% ~8 s remaining |==================== |39.29766% ~8 s remaining |==================== |39.46488% ~8 s remaining |==================== |39.63211% ~8 s remaining |==================== |39.79933% ~8 s remaining |==================== |39.96656% ~8 s remaining |==================== |40.13378% ~8 s remaining |==================== |40.301% ~8 s remaining |===================== |40.46823% ~8 s remaining |===================== |40.63545% ~8 s remaining |===================== |40.80268% ~8 s remaining |===================== |40.9699% ~8 s remaining |===================== |41.13712% ~8 s remaining |===================== |41.30435% ~8 s remaining |===================== |41.47157% ~8 s remaining |===================== |41.6388% ~8 s remaining |===================== |41.80602% ~8 s remaining |===================== |41.97324% ~8 s remaining |===================== |42.14047% ~8 s remaining |====================== |42.30769% ~8 s remaining |====================== |42.47492% ~8 s remaining |====================== |42.64214% ~8 s remaining |====================== |42.80936% ~8 s remaining |====================== |42.97659% ~8 s remaining |====================== |43.14381% ~8 s remaining |====================== |43.31104% ~8 s remaining |====================== |43.47826% ~8 s remaining |====================== |43.64548% ~8 s remaining |====================== |43.81271% ~8 s remaining |====================== |43.97993% ~8 s remaining |====================== |44.14716% ~8 s remaining |======================= |44.31438% ~8 s remaining |======================= |44.48161% ~8 s remaining |======================= |44.64883% ~8 s remaining |======================= |44.81605% ~8 s remaining |======================= |44.98328% ~7 s remaining |======================= |45.1505% ~7 s remaining |======================= |45.31773% ~7 s remaining |======================= |45.48495% ~7 s remaining |======================= |45.65217% ~7 s remaining |======================= |45.8194% ~7 s remaining |======================= |45.98662% ~7 s remaining |======================== |46.15385% ~7 s remaining |======================== |46.32107% ~7 s remaining |======================== |46.48829% ~7 s remaining |======================== |46.65552% ~7 s remaining |======================== |46.82274% ~7 s remaining |======================== |46.98997% ~7 s remaining |======================== |47.15719% ~7 s remaining |======================== |47.32441% ~7 s remaining |======================== |47.49164% ~7 s remaining |======================== |47.65886% ~7 s remaining |======================== |47.82609% ~7 s remaining |======================== |47.99331% ~7 s remaining |========================= |48.16054% ~7 s remaining |========================= |48.32776% ~7 s remaining |========================= |48.49498% ~7 s remaining |========================= |48.66221% ~7 s remaining |========================= |48.82943% ~7 s remaining |========================= |48.99666% ~7 s remaining |========================= |49.16388% ~7 s remaining |========================= |49.3311% ~7 s remaining |========================= |49.49833% ~7 s remaining |========================= |49.66555% ~7 s remaining |========================= |49.83278% ~7 s remaining |========================== | 50% ~7 s remaining |========================== |50.16722% ~7 s remaining |========================== |50.33445% ~7 s remaining |========================== |50.50167% ~7 s remaining |========================== |50.6689% ~7 s remaining |========================== |50.83612% ~7 s remaining |========================== |51.00334% ~7 s remaining |========================== |51.17057% ~7 s remaining |========================== |51.33779% ~7 s remaining |========================== |51.50502% ~7 s remaining |========================== |51.67224% ~7 s remaining |========================== |51.83946% ~7 s remaining |=========================== |52.00669% ~7 s remaining |=========================== |52.17391% ~7 s remaining |=========================== |52.34114% ~6 s remaining |=========================== |52.50836% ~6 s remaining |=========================== |52.67559% ~6 s remaining |=========================== |52.84281% ~6 s remaining |=========================== |53.01003% ~6 s remaining |=========================== |53.17726% ~6 s remaining |=========================== |53.34448% ~6 s remaining |=========================== |53.51171% ~6 s remaining |=========================== |53.67893% ~6 s remaining |============================ |53.84615% ~6 s remaining |============================ |54.01338% ~6 s remaining |============================ |54.1806% ~6 s remaining |============================ |54.34783% ~6 s remaining |============================ |54.51505% ~6 s remaining |============================ |54.68227% ~6 s remaining |============================ |54.8495% ~6 s remaining |============================ |55.01672% ~6 s remaining |============================ |55.18395% ~6 s remaining |============================ |55.35117% ~6 s remaining |============================ |55.51839% ~6 s remaining |============================ |55.68562% ~6 s remaining |============================= |55.85284% ~6 s remaining |============================= |56.02007% ~6 s remaining |============================= |56.18729% ~6 s remaining |============================= |56.35452% ~6 s remaining |============================= |56.52174% ~6 s remaining |============================= |56.68896% ~6 s remaining |============================= |56.85619% ~6 s remaining |============================= |57.02341% ~6 s remaining |============================= |57.19064% ~6 s remaining |============================= |57.35786% ~6 s remaining |============================= |57.52508% ~6 s remaining |============================= |57.69231% ~6 s remaining |============================== |57.85953% ~6 s remaining |============================== |58.02676% ~6 s remaining |============================== |58.19398% ~6 s remaining |============================== |58.3612% ~6 s remaining |============================== |58.52843% ~6 s remaining |============================== |58.69565% ~6 s remaining |============================== |58.86288% ~6 s remaining |============================== |59.0301% ~6 s remaining |============================== |59.19732% ~6 s remaining |============================== |59.36455% ~5 s remaining |============================== |59.53177% ~5 s remaining |=============================== |59.699% ~5 s remaining |=============================== |59.86622% ~5 s remaining |=============================== |60.03344% ~5 s remaining |=============================== |60.20067% ~5 s remaining |=============================== |60.36789% ~5 s remaining |=============================== |60.53512% ~5 s remaining |=============================== |60.70234% ~5 s remaining |=============================== |60.86957% ~5 s remaining |=============================== |61.03679% ~5 s remaining |=============================== |61.20401% ~5 s remaining |=============================== |61.37124% ~5 s remaining |================================ |61.53846% ~5 s remaining |================================ |61.70569% ~5 s remaining |================================ |61.87291% ~5 s remaining |================================ |62.04013% ~5 s remaining |================================ |62.20736% ~5 s remaining |================================ |62.37458% ~5 s remaining |================================ |62.54181% ~5 s remaining |================================ |62.70903% ~5 s remaining |================================ |62.87625% ~5 s remaining |================================ |63.04348% ~5 s remaining |================================ |63.2107% ~5 s remaining |================================ |63.37793% ~5 s remaining |================================= |63.54515% ~5 s remaining |================================= |63.71237% ~5 s remaining |================================= |63.8796% ~5 s remaining |================================= |64.04682% ~5 s remaining |================================= |64.21405% ~5 s remaining |================================= |64.38127% ~5 s remaining |================================= |64.54849% ~5 s remaining |================================= |64.71572% ~5 s remaining |================================= |64.88294% ~5 s remaining |================================= |65.05017% ~5 s remaining |================================= |65.21739% ~5 s remaining |================================== |65.38462% ~5 s remaining |================================== |65.55184% ~5 s remaining |================================== |65.71906% ~5 s remaining |================================== |65.88629% ~5 s remaining |================================== |66.05351% ~5 s remaining |================================== |66.22074% ~5 s remaining |================================== |66.38796% ~5 s remaining |================================== |66.55518% ~5 s remaining |================================== |66.72241% ~4 s remaining |================================== |66.88963% ~4 s remaining |================================== |67.05686% ~4 s remaining |================================== |67.22408% ~4 s remaining |=================================== |67.3913% ~4 s remaining |=================================== |67.55853% ~4 s remaining |=================================== |67.72575% ~4 s remaining |=================================== |67.89298% ~4 s remaining |=================================== |68.0602% ~4 s remaining |=================================== |68.22742% ~4 s remaining |=================================== |68.39465% ~4 s remaining |=================================== |68.56187% ~4 s remaining |=================================== |68.7291% ~4 s remaining |=================================== |68.89632% ~4 s remaining |=================================== |69.06355% ~4 s remaining |==================================== |69.23077% ~4 s remaining |==================================== |69.39799% ~4 s remaining |==================================== |69.56522% ~4 s remaining |==================================== |69.73244% ~4 s remaining |==================================== |69.89967% ~4 s remaining |==================================== |70.06689% ~4 s remaining |==================================== |70.23411% ~4 s remaining |==================================== |70.40134% ~4 s remaining |==================================== |70.56856% ~4 s remaining |==================================== |70.73579% ~4 s remaining |==================================== |70.90301% ~4 s remaining |==================================== |71.07023% ~4 s remaining |===================================== |71.23746% ~4 s remaining |===================================== |71.40468% ~4 s remaining |===================================== |71.57191% ~4 s remaining |===================================== |71.73913% ~4 s remaining |===================================== |71.90635% ~4 s remaining |===================================== |72.07358% ~4 s remaining |===================================== |72.2408% ~4 s remaining |===================================== |72.40803% ~4 s remaining |===================================== |72.57525% ~4 s remaining |===================================== |72.74247% ~4 s remaining |===================================== |72.9097% ~4 s remaining |====================================== |73.07692% ~4 s remaining |====================================== |73.24415% ~4 s remaining |====================================== |73.41137% ~4 s remaining |====================================== |73.5786% ~4 s remaining |====================================== |73.74582% ~4 s remaining |====================================== |73.91304% ~4 s remaining |====================================== |74.08027% ~4 s remaining |====================================== |74.24749% ~4 s remaining |====================================== |74.41472% ~4 s remaining |====================================== |74.58194% ~4 s remaining |====================================== |74.74916% ~3 s remaining |====================================== |74.91639% ~3 s remaining |======================================= |75.08361% ~3 s remaining |======================================= |75.25084% ~3 s remaining |======================================= |75.41806% ~3 s remaining |======================================= |75.58528% ~4 s remaining |======================================= |75.75251% ~4 s remaining |======================================= |75.91973% ~4 s remaining |======================================= |76.08696% ~4 s remaining |======================================= |76.25418% ~3 s remaining |======================================= |76.4214% ~3 s remaining |======================================= |76.58863% ~3 s remaining |======================================= |76.75585% ~3 s remaining |======================================== |76.92308% ~3 s remaining |======================================== |77.0903% ~3 s remaining |======================================== |77.25753% ~3 s remaining |======================================== |77.42475% ~3 s remaining |======================================== |77.59197% ~3 s remaining |======================================== |77.7592% ~3 s remaining |======================================== |77.92642% ~3 s remaining |======================================== |78.09365% ~3 s remaining |======================================== |78.26087% ~3 s remaining |======================================== |78.42809% ~3 s remaining |======================================== |78.59532% ~3 s remaining |======================================== |78.76254% ~3 s remaining |========================================= |78.92977% ~3 s remaining |========================================= |79.09699% ~3 s remaining |========================================= |79.26421% ~3 s remaining |========================================= |79.43144% ~3 s remaining |========================================= |79.59866% ~3 s remaining |========================================= |79.76589% ~3 s remaining |========================================= |79.93311% ~3 s remaining |========================================= |80.10033% ~3 s remaining |========================================= |80.26756% ~3 s remaining |========================================= |80.43478% ~3 s remaining |========================================= |80.60201% ~3 s remaining |========================================== |80.76923% ~3 s remaining |========================================== |80.93645% ~3 s remaining |========================================== |81.10368% ~3 s remaining |========================================== |81.2709% ~3 s remaining |========================================== |81.43813% ~3 s remaining |========================================== |81.60535% ~3 s remaining |========================================== |81.77258% ~3 s remaining |========================================== |81.9398% ~3 s remaining |========================================== |82.10702% ~3 s remaining |========================================== |82.27425% ~3 s remaining |========================================== |82.44147% ~3 s remaining |========================================== |82.6087% ~3 s remaining |=========================================== |82.77592% ~3 s remaining |=========================================== |82.94314% ~2 s remaining |=========================================== |83.11037% ~2 s remaining |=========================================== |83.27759% ~2 s remaining |=========================================== |83.44482% ~2 s remaining |=========================================== |83.61204% ~2 s remaining |=========================================== |83.77926% ~2 s remaining |=========================================== |83.94649% ~2 s remaining |=========================================== |84.11371% ~2 s remaining |=========================================== |84.28094% ~2 s remaining |=========================================== |84.44816% ~2 s remaining |============================================ |84.61538% ~2 s remaining |============================================ |84.78261% ~2 s remaining |============================================ |84.94983% ~2 s remaining |============================================ |85.11706% ~2 s remaining |============================================ |85.28428% ~2 s remaining |============================================ |85.45151% ~2 s remaining |============================================ |85.61873% ~2 s remaining |============================================ |85.78595% ~2 s remaining |============================================ |85.95318% ~2 s remaining |============================================ |86.1204% ~2 s remaining |============================================ |86.28763% ~2 s remaining |============================================ |86.45485% ~2 s remaining |============================================= |86.62207% ~2 s remaining |============================================= |86.7893% ~2 s remaining |============================================= |86.95652% ~2 s remaining |============================================= |87.12375% ~2 s remaining |============================================= |87.29097% ~2 s remaining |============================================= |87.45819% ~2 s remaining |============================================= |87.62542% ~2 s remaining |============================================= |87.79264% ~2 s remaining |============================================= |87.95987% ~2 s remaining |============================================= |88.12709% ~2 s remaining |============================================= |88.29431% ~2 s remaining |============================================== |88.46154% ~2 s remaining |============================================== |88.62876% ~2 s remaining |============================================== |88.79599% ~2 s remaining |============================================== |88.96321% ~2 s remaining |============================================== |89.13043% ~2 s remaining |============================================== |89.29766% ~2 s remaining |============================================== |89.46488% ~2 s remaining |============================================== |89.63211% ~1 s remaining |============================================== |89.79933% ~1 s remaining |============================================== |89.96656% ~1 s remaining |============================================== |90.13378% ~1 s remaining |============================================== |90.301% ~1 s remaining |=============================================== |90.46823% ~1 s remaining |=============================================== |90.63545% ~1 s remaining |=============================================== |90.80268% ~1 s remaining |=============================================== |90.9699% ~1 s remaining |=============================================== |91.13712% ~1 s remaining |=============================================== |91.30435% ~1 s remaining |=============================================== |91.47157% ~1 s remaining |=============================================== |91.6388% ~1 s remaining |=============================================== |91.80602% ~1 s remaining |=============================================== |91.97324% ~1 s remaining |=============================================== |92.14047% ~1 s remaining |================================================ |92.30769% ~1 s remaining |================================================ |92.47492% ~1 s remaining |================================================ |92.64214% ~1 s remaining |================================================ |92.80936% ~1 s remaining |================================================ |92.97659% ~1 s remaining |================================================ |93.14381% ~1 s remaining |================================================ |93.31104% ~1 s remaining |================================================ |93.47826% ~1 s remaining |================================================ |93.64548% ~1 s remaining |================================================ |93.81271% ~1 s remaining |================================================ |93.97993% ~1 s remaining |================================================ |94.14716% ~1 s remaining |================================================= |94.31438% ~1 s remaining |================================================= |94.48161% ~1 s remaining |================================================= |94.64883% ~1 s remaining |================================================= |94.81605% ~1 s remaining |================================================= |94.98328% ~1 s remaining |================================================= |95.1505% ~1 s remaining |================================================= |95.31773% ~1 s remaining |================================================= |95.48495% ~1 s remaining |================================================= |95.65217% ~1 s remaining |================================================= |95.8194% ~1 s remaining |================================================= |95.98662% ~1 s remaining |================================================== |96.15385% ~1 s remaining |================================================== |96.32107% ~1 s remaining |================================================== |96.48829% ~1 s remaining |================================================== |96.65552% ~1 s remaining |================================================== |96.82274% ~0 s remaining |================================================== |96.98997% ~0 s remaining |================================================== |97.15719% ~0 s remaining |================================================== |97.32441% ~0 s remaining |================================================== |97.49164% ~0 s remaining |================================================== |97.65886% ~0 s remaining |================================================== |97.82609% ~0 s remaining |================================================== |97.99331% ~0 s remaining |=================================================== |98.16054% ~0 s remaining |=================================================== |98.32776% ~0 s remaining |=================================================== |98.49498% ~0 s remaining |=================================================== |98.66221% ~0 s remaining |=================================================== |98.82943% ~0 s remaining |=================================================== |98.99666% ~0 s remaining |=================================================== |99.16388% ~0 s remaining |=================================================== |99.3311% ~0 s remaining |=================================================== |99.49833% ~0 s remaining |=================================================== |99.66555% ~0 s remaining |=================================================== |99.83278% ~0 s remaining |====================================================|100% ~0 s remaining |====================================================|100% Completed after 15 s
## Starting to add information to samples
## => Add clinical information to samples
## => Adding TCGA molecular information from marker papers
## => Information will have prefix 'paper_'
## luad subtype information from:doi:10.1038/nature13385
## Available assays in SummarizedExperiment :
## => unstranded
## => stranded_first
## => stranded_second
## => tpm_unstrand
## => fpkm_unstrand
## => fpkm_uq_unstrand
counts <- as.data.frame(assay(data)) # Extracting the count matrix (these are supposedly raw counts)
head(counts) # Viewing the first few rows (genes) and columns (samples)
## TCGA-73-4658-01A-01R-1755-07 TCGA-44-2661-11A-01R-1758-07
## ENSG00000000003.15 3659 1395
## ENSG00000000005.6 188 8
## ENSG00000000419.13 981 1031
## ENSG00000000457.14 456 541
## ENSG00000000460.17 158 135
## ENSG00000000938.13 1645 3245
## TCGA-55-6986-11A-01R-1949-07 TCGA-55-8615-01A-11R-2403-07
## ENSG00000000003.15 6760 2257
## ENSG00000000005.6 3 0
## ENSG00000000419.13 2070 644
## ENSG00000000457.14 1110 538
## ENSG00000000460.17 202 212
## ENSG00000000938.13 4876 616
## TCGA-97-8177-01A-11R-2287-07 TCGA-49-6744-11A-01R-1858-07
## ENSG00000000003.15 5009 1353
## ENSG00000000005.6 13 2
## ENSG00000000419.13 2731 842
## ENSG00000000457.14 919 417
## ENSG00000000460.17 321 106
## ENSG00000000938.13 2299 2699
## TCGA-67-3771-01A-01R-0946-07 TCGA-49-6744-01A-11R-1858-07
## ENSG00000000003.15 1053 2786
## ENSG00000000005.6 1 17
## ENSG00000000419.13 1817 1443
## ENSG00000000457.14 598 680
## ENSG00000000460.17 339 242
## ENSG00000000938.13 902 1458
## TCGA-MP-A4SW-01A-21R-A24X-07 TCGA-97-8176-01A-11R-2403-07
## ENSG00000000003.15 3440 3023
## ENSG00000000005.6 333 0
## ENSG00000000419.13 862 1067
## ENSG00000000457.14 717 579
## ENSG00000000460.17 267 370
## ENSG00000000938.13 1567 636
## TCGA-97-7552-01A-11R-2039-07 TCGA-55-A48Z-01A-12R-A24X-07
## ENSG00000000003.15 2475 6832
## ENSG00000000005.6 3 0
## ENSG00000000419.13 1276 1608
## ENSG00000000457.14 1158 978
## ENSG00000000460.17 193 409
## ENSG00000000938.13 1520 846
## TCGA-50-5944-01A-11R-1755-07 TCGA-MN-A4N5-01A-11R-A24X-07
## ENSG00000000003.15 3266 6122
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1015 2714
## ENSG00000000457.14 626 1042
## ENSG00000000460.17 147 712
## ENSG00000000938.13 554 840
## TCGA-75-5146-01A-01R-1628-07 TCGA-97-7546-01A-11R-2039-07
## ENSG00000000003.15 2783 3447
## ENSG00000000005.6 1 15
## ENSG00000000419.13 1131 1736
## ENSG00000000457.14 672 870
## ENSG00000000460.17 173 393
## ENSG00000000938.13 398 808
## TCGA-55-7911-01A-11R-2170-07 TCGA-44-5643-01A-01R-1628-07
## ENSG00000000003.15 1237 2652
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1662 2261
## ENSG00000000457.14 671 327
## ENSG00000000460.17 435 368
## ENSG00000000938.13 611 455
## TCGA-86-8279-01A-11R-2287-07 TCGA-75-6212-01A-11R-1755-07
## ENSG00000000003.15 8462 2655
## ENSG00000000005.6 3 2
## ENSG00000000419.13 3576 574
## ENSG00000000457.14 1871 362
## ENSG00000000460.17 828 142
## ENSG00000000938.13 788 463
## TCGA-55-8299-01A-11R-2287-07 TCGA-83-5908-01A-21R-2287-07
## ENSG00000000003.15 2143 3101
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1452 2763
## ENSG00000000457.14 737 1255
## ENSG00000000460.17 430 1518
## ENSG00000000938.13 1304 1104
## TCGA-44-2655-01A-01R-0946-07 TCGA-50-6597-01A-11R-1858-07
## ENSG00000000003.15 7709 1599
## ENSG00000000005.6 4 0
## ENSG00000000419.13 2754 515
## ENSG00000000457.14 2163 295
## ENSG00000000460.17 499 21
## ENSG00000000938.13 1212 246
## TCGA-44-6776-11A-01R-1858-07 TCGA-44-6147-01B-06R-A277-07
## ENSG00000000003.15 1200 1002
## ENSG00000000005.6 2 10
## ENSG00000000419.13 909 284
## ENSG00000000457.14 373 549
## ENSG00000000460.17 70 497
## ENSG00000000938.13 2123 177
## TCGA-44-6776-01A-11R-1858-07 TCGA-55-8511-01A-11R-2403-07
## ENSG00000000003.15 1384 2376
## ENSG00000000005.6 1 0
## ENSG00000000419.13 642 1027
## ENSG00000000457.14 602 599
## ENSG00000000460.17 119 333
## ENSG00000000938.13 184 719
## TCGA-95-7948-01A-11R-2187-07 TCGA-97-8172-01A-11R-2287-07
## ENSG00000000003.15 3087 2219
## ENSG00000000005.6 0 5
## ENSG00000000419.13 2389 1622
## ENSG00000000457.14 1171 2548
## ENSG00000000460.17 282 427
## ENSG00000000938.13 240 1892
## TCGA-55-6979-01A-11R-1949-07 TCGA-55-6979-11A-01R-1949-07
## ENSG00000000003.15 3103 513
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1030 585
## ENSG00000000457.14 489 304
## ENSG00000000460.17 462 77
## ENSG00000000938.13 1242 3081
## TCGA-44-6146-11A-01R-1858-07 TCGA-50-5939-11A-01R-1628-07
## ENSG00000000003.15 548 829
## ENSG00000000005.6 1 6
## ENSG00000000419.13 722 829
## ENSG00000000457.14 403 334
## ENSG00000000460.17 73 93
## ENSG00000000938.13 1535 2289
## TCGA-05-4410-01A-21R-1858-07 TCGA-64-1677-01A-01R-0946-07
## ENSG00000000003.15 1583 2474
## ENSG00000000005.6 2 188
## ENSG00000000419.13 483 1754
## ENSG00000000457.14 597 278
## ENSG00000000460.17 183 204
## ENSG00000000938.13 598 305
## TCGA-78-7633-01A-11R-2066-07 TCGA-55-7724-01A-11R-2170-07
## ENSG00000000003.15 4673 1676
## ENSG00000000005.6 2 1
## ENSG00000000419.13 1715 856
## ENSG00000000457.14 1975 361
## ENSG00000000460.17 594 135
## ENSG00000000938.13 421 638
## TCGA-05-4405-01A-21R-1858-07 TCGA-J2-8194-01A-11R-2241-07
## ENSG00000000003.15 2438 6038
## ENSG00000000005.6 0 0
## ENSG00000000419.13 593 1470
## ENSG00000000457.14 701 1446
## ENSG00000000460.17 297 244
## ENSG00000000938.13 450 1059
## TCGA-44-8119-01A-11R-2241-07 TCGA-62-A471-01A-12R-A24H-07
## ENSG00000000003.15 6656 4275
## ENSG00000000005.6 0 0
## ENSG00000000419.13 3399 1071
## ENSG00000000457.14 1292 630
## ENSG00000000460.17 735 542
## ENSG00000000938.13 1325 372
## TCGA-86-8674-01A-21R-2403-07 TCGA-55-A48Y-01A-11R-A24H-07
## ENSG00000000003.15 2352 4118
## ENSG00000000005.6 1 0
## ENSG00000000419.13 911 1117
## ENSG00000000457.14 760 632
## ENSG00000000460.17 206 389
## ENSG00000000938.13 136 472
## TCGA-91-6829-01A-21R-1858-07 TCGA-91-6829-11A-01R-1858-07
## ENSG00000000003.15 2018 780
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1252 789
## ENSG00000000457.14 556 406
## ENSG00000000460.17 316 146
## ENSG00000000938.13 420 10095
## TCGA-69-7763-01A-11R-2170-07 TCGA-64-5779-01A-01R-1628-07
## ENSG00000000003.15 1624 1526
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1588 1377
## ENSG00000000457.14 498 537
## ENSG00000000460.17 114 305
## ENSG00000000938.13 743 333
## TCGA-78-8640-01A-11R-2403-07 TCGA-64-1680-01A-02R-0946-07
## ENSG00000000003.15 997 4313
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1227 1337
## ENSG00000000457.14 852 591
## ENSG00000000460.17 527 144
## ENSG00000000938.13 1034 166
## TCGA-38-4626-11A-01R-1758-07 TCGA-55-6980-01A-11R-1949-07
## ENSG00000000003.15 2370 4407
## ENSG00000000005.6 1 2
## ENSG00000000419.13 2770 513
## ENSG00000000457.14 621 336
## ENSG00000000460.17 166 97
## ENSG00000000938.13 17257 576
## TCGA-55-6980-11A-01R-1949-07 TCGA-91-6835-11A-01R-1858-07
## ENSG00000000003.15 618 2510
## ENSG00000000005.6 0 4
## ENSG00000000419.13 570 881
## ENSG00000000457.14 346 506
## ENSG00000000460.17 78 96
## ENSG00000000938.13 1476 1062
## TCGA-86-8585-01A-11R-2403-07 TCGA-75-6211-01A-11R-1755-07
## ENSG00000000003.15 4460 3513
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1109 1436
## ENSG00000000457.14 411 492
## ENSG00000000460.17 283 363
## ENSG00000000938.13 2004 162
## TCGA-50-5066-01A-01R-1628-07 TCGA-78-7149-01A-11R-2039-07
## ENSG00000000003.15 3580 2351
## ENSG00000000005.6 1 3
## ENSG00000000419.13 2836 953
## ENSG00000000457.14 533 699
## ENSG00000000460.17 433 152
## ENSG00000000938.13 533 270
## TCGA-97-A4M2-01A-12R-A24X-07 TCGA-44-6777-01A-11R-1858-07
## ENSG00000000003.15 1459 1728
## ENSG00000000005.6 0 3
## ENSG00000000419.13 1031 781
## ENSG00000000457.14 861 490
## ENSG00000000460.17 187 218
## ENSG00000000938.13 6849 2347
## TCGA-67-3774-01A-01R-0946-07 TCGA-05-4426-01A-01R-1206-07
## ENSG00000000003.15 2698 9468
## ENSG00000000005.6 0 1
## ENSG00000000419.13 738 2420
## ENSG00000000457.14 611 1100
## ENSG00000000460.17 203 417
## ENSG00000000938.13 862 1515
## TCGA-55-6982-11A-01R-1949-07 TCGA-55-A4DF-01A-11R-A24H-07
## ENSG00000000003.15 950 3943
## ENSG00000000005.6 1 0
## ENSG00000000419.13 706 3162
## ENSG00000000457.14 475 726
## ENSG00000000460.17 87 603
## ENSG00000000938.13 2201 1578
## TCGA-97-7553-01A-21R-2039-07 TCGA-MP-A4TD-01A-32R-A262-07
## ENSG00000000003.15 5176 3071
## ENSG00000000005.6 8 0
## ENSG00000000419.13 1899 1155
## ENSG00000000457.14 856 660
## ENSG00000000460.17 441 362
## ENSG00000000938.13 4358 664
## TCGA-50-5930-01A-11R-1755-07 TCGA-95-A4VN-01A-11R-A262-07
## ENSG00000000003.15 1155 1959
## ENSG00000000005.6 0 0
## ENSG00000000419.13 460 1104
## ENSG00000000457.14 156 837
## ENSG00000000460.17 90 524
## ENSG00000000938.13 437 1645
## TCGA-44-3396-11A-01R-1758-07 TCGA-93-A4JO-01A-21R-A24X-07
## ENSG00000000003.15 2615 3541
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1664 1982
## ENSG00000000457.14 858 854
## ENSG00000000460.17 172 431
## ENSG00000000938.13 9948 1763
## TCGA-49-AAR0-01A-21R-A39D-07 TCGA-67-4679-01B-01R-1755-07
## ENSG00000000003.15 1443 1655
## ENSG00000000005.6 1 2
## ENSG00000000419.13 1716 895
## ENSG00000000457.14 669 1100
## ENSG00000000460.17 302 199
## ENSG00000000938.13 2315 814
## TCGA-O1-A52J-01A-11R-A262-07 TCGA-05-4420-01A-01R-1206-07
## ENSG00000000003.15 2730 8815
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1583 3824
## ENSG00000000457.14 804 1011
## ENSG00000000460.17 356 751
## ENSG00000000938.13 2852 920
## TCGA-55-6642-01A-11R-1858-07 TCGA-86-7953-01A-11R-2187-07
## ENSG00000000003.15 1672 4519
## ENSG00000000005.6 1 0
## ENSG00000000419.13 729 1322
## ENSG00000000457.14 476 911
## ENSG00000000460.17 206 1180
## ENSG00000000938.13 278 1796
## TCGA-55-6983-11A-01R-1949-07 TCGA-53-7624-01A-11R-2066-07
## ENSG00000000003.15 897 2020
## ENSG00000000005.6 1 1
## ENSG00000000419.13 735 1419
## ENSG00000000457.14 321 891
## ENSG00000000460.17 55 813
## ENSG00000000938.13 1984 322
## TCGA-97-A4M3-01A-11R-A24X-07 TCGA-80-5608-01A-31R-1949-07
## ENSG00000000003.15 1430 5351
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1196 1269
## ENSG00000000457.14 617 909
## ENSG00000000460.17 190 456
## ENSG00000000938.13 341 986
## TCGA-55-8620-01A-11R-2403-07 TCGA-62-8395-01A-11R-2326-07
## ENSG00000000003.15 1097 1548
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1486 1239
## ENSG00000000457.14 200 681
## ENSG00000000460.17 215 184
## ENSG00000000938.13 633 522
## TCGA-75-6207-01A-11R-1755-07 TCGA-50-5068-01A-01R-1628-07
## ENSG00000000003.15 2473 2897
## ENSG00000000005.6 8 2
## ENSG00000000419.13 1801 3171
## ENSG00000000457.14 839 771
## ENSG00000000460.17 394 249
## ENSG00000000938.13 666 532
## TCGA-NJ-A4YF-01A-12R-A262-07 TCGA-44-6145-01A-11R-1755-07
## ENSG00000000003.15 1267 1398
## ENSG00000000005.6 0 0
## ENSG00000000419.13 3955 1287
## ENSG00000000457.14 362 656
## ENSG00000000460.17 210 303
## ENSG00000000938.13 492 739
## TCGA-05-4424-01A-22R-1858-07 TCGA-86-A4P8-01A-11R-A24X-07
## ENSG00000000003.15 3373 1157
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1045 1235
## ENSG00000000457.14 662 805
## ENSG00000000460.17 386 208
## ENSG00000000938.13 1067 4548
## TCGA-67-3772-01A-01R-0946-07 TCGA-44-4112-01B-06R-A277-07
## ENSG00000000003.15 9732 591
## ENSG00000000005.6 20 20
## ENSG00000000419.13 2166 383
## ENSG00000000457.14 1000 871
## ENSG00000000460.17 345 462
## ENSG00000000938.13 855 204
## TCGA-44-4112-01A-01R-A278-07 TCGA-55-7994-01A-11R-2187-07
## ENSG00000000003.15 1309 3841
## ENSG00000000005.6 7 18
## ENSG00000000419.13 1123 1625
## ENSG00000000457.14 1064 1118
## ENSG00000000460.17 334 762
## ENSG00000000938.13 211 2424
## TCGA-49-4490-01A-21R-1858-07 TCGA-35-4123-01A-01R-1107-07
## ENSG00000000003.15 3760 7006
## ENSG00000000005.6 3 0
## ENSG00000000419.13 632 2860
## ENSG00000000457.14 351 983
## ENSG00000000460.17 106 883
## ENSG00000000938.13 239 968
## TCGA-05-4425-01A-01R-1755-07 TCGA-49-4490-11A-01R-1858-07
## ENSG00000000003.15 1539 1524
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1282 1677
## ENSG00000000457.14 445 414
## ENSG00000000460.17 198 99
## ENSG00000000938.13 852 1392
## TCGA-55-8204-01A-11R-2241-07 TCGA-MP-A4T9-01A-11R-A24X-07
## ENSG00000000003.15 7254 7321
## ENSG00000000005.6 2 4
## ENSG00000000419.13 2428 1475
## ENSG00000000457.14 716 629
## ENSG00000000460.17 521 303
## ENSG00000000938.13 735 1770
## TCGA-78-7539-01A-11R-2066-07 TCGA-50-5933-11A-01R-1755-07
## ENSG00000000003.15 4165 1110
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1534 1192
## ENSG00000000457.14 1162 275
## ENSG00000000460.17 446 74
## ENSG00000000938.13 2130 3373
## TCGA-50-5933-01A-11R-1755-07 TCGA-MP-A4T8-01A-11R-A24X-07
## ENSG00000000003.15 3500 4768
## ENSG00000000005.6 17 1
## ENSG00000000419.13 1525 1494
## ENSG00000000457.14 467 719
## ENSG00000000460.17 420 426
## ENSG00000000938.13 1345 298
## TCGA-55-6970-11A-01R-1949-07 TCGA-55-8302-01A-11R-2326-07
## ENSG00000000003.15 783 3018
## ENSG00000000005.6 0 0
## ENSG00000000419.13 783 1159
## ENSG00000000457.14 380 444
## ENSG00000000460.17 72 323
## ENSG00000000938.13 2418 1040
## TCGA-44-A47A-01A-21R-A24H-07 TCGA-MP-A4TF-01A-11R-A262-07
## ENSG00000000003.15 1686 2421
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1064 1617
## ENSG00000000457.14 1167 814
## ENSG00000000460.17 329 415
## ENSG00000000938.13 1603 380
## TCGA-50-7109-01A-11R-2039-07 TCGA-78-7161-01A-11R-2039-07
## ENSG00000000003.15 2624 7079
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1964 1899
## ENSG00000000457.14 431 959
## ENSG00000000460.17 198 252
## ENSG00000000938.13 634 293
## TCGA-55-6971-01A-11R-1949-07 TCGA-86-A456-01A-11R-A24H-07
## ENSG00000000003.15 1369 1945
## ENSG00000000005.6 0 0
## ENSG00000000419.13 703 1268
## ENSG00000000457.14 479 441
## ENSG00000000460.17 140 119
## ENSG00000000938.13 1004 1445
## TCGA-44-6148-11A-01R-1858-07 TCGA-44-6148-01A-11R-1755-07
## ENSG00000000003.15 3135 4774
## ENSG00000000005.6 4 9
## ENSG00000000419.13 1152 974
## ENSG00000000457.14 725 838
## ENSG00000000460.17 166 149
## ENSG00000000938.13 3046 750
## TCGA-MP-A5C7-01A-11R-A262-07 TCGA-49-AAQV-01A-11R-A39D-07
## ENSG00000000003.15 3424 8559
## ENSG00000000005.6 1 2
## ENSG00000000419.13 1700 2325
## ENSG00000000457.14 2170 1365
## ENSG00000000460.17 457 671
## ENSG00000000938.13 263 775
## TCGA-75-6205-01A-11R-1755-07 TCGA-44-A47B-01A-11R-A24H-07
## ENSG00000000003.15 626 2370
## ENSG00000000005.6 0 8
## ENSG00000000419.13 576 1290
## ENSG00000000457.14 264 713
## ENSG00000000460.17 163 496
## ENSG00000000938.13 958 847
## TCGA-50-5049-01A-01R-1628-07 TCGA-55-5899-01A-11R-1628-07
## ENSG00000000003.15 1638 1146
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1563 1397
## ENSG00000000457.14 655 335
## ENSG00000000460.17 369 343
## ENSG00000000938.13 1734 509
## TCGA-50-5045-01A-01R-1628-07 TCGA-69-8453-01A-12R-2326-07
## ENSG00000000003.15 6341 1671
## ENSG00000000005.6 31 1
## ENSG00000000419.13 2299 1100
## ENSG00000000457.14 637 649
## ENSG00000000460.17 289 226
## ENSG00000000938.13 2541 5589
## TCGA-55-A491-01A-11R-A24H-07 TCGA-49-6743-11A-01R-1858-07
## ENSG00000000003.15 5849 1281
## ENSG00000000005.6 2 107
## ENSG00000000419.13 1227 1042
## ENSG00000000457.14 757 334
## ENSG00000000460.17 520 94
## ENSG00000000938.13 936 1647
## TCGA-49-6743-01A-11R-1858-07 TCGA-78-7150-01A-21R-2039-07
## ENSG00000000003.15 1085 3440
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1382 1478
## ENSG00000000457.14 610 625
## ENSG00000000460.17 599 749
## ENSG00000000938.13 543 227
## TCGA-95-A4VP-01A-21R-A262-07 TCGA-49-4512-01A-21R-1858-07
## ENSG00000000003.15 3648 1516
## ENSG00000000005.6 0 1
## ENSG00000000419.13 955 837
## ENSG00000000457.14 526 293
## ENSG00000000460.17 293 87
## ENSG00000000938.13 1147 695
## TCGA-86-7701-01A-11R-2170-07 TCGA-55-8513-01A-11R-2403-07
## ENSG00000000003.15 1086 1428
## ENSG00000000005.6 0 0
## ENSG00000000419.13 687 624
## ENSG00000000457.14 304 477
## ENSG00000000460.17 180 108
## ENSG00000000938.13 270 3715
## TCGA-55-8206-01A-11R-2241-07 TCGA-44-7660-01A-11R-2066-07
## ENSG00000000003.15 5487 6616
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1804 2332
## ENSG00000000457.14 1798 998
## ENSG00000000460.17 337 614
## ENSG00000000938.13 3921 734
## TCGA-95-7944-01A-11R-2187-07 TCGA-L9-A743-01A-43R-A39D-07
## ENSG00000000003.15 1566 2559
## ENSG00000000005.6 0 1
## ENSG00000000419.13 2666 1415
## ENSG00000000457.14 701 561
## ENSG00000000460.17 642 336
## ENSG00000000938.13 1327 1778
## TCGA-55-7995-01A-11R-2187-07 TCGA-97-7554-01A-11R-2039-07
## ENSG00000000003.15 4577 2777
## ENSG00000000005.6 0 11
## ENSG00000000419.13 2329 1205
## ENSG00000000457.14 1257 611
## ENSG00000000460.17 823 376
## ENSG00000000938.13 3079 763
## TCGA-50-6673-01A-11R-1949-07 TCGA-49-4514-01A-21R-1858-07
## ENSG00000000003.15 2904 2261
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1071 1792
## ENSG00000000457.14 437 246
## ENSG00000000460.17 260 131
## ENSG00000000938.13 713 275
## TCGA-NJ-A55A-01A-11R-A262-07 TCGA-78-7160-01A-11R-2039-07
## ENSG00000000003.15 3009 1946
## ENSG00000000005.6 0 1
## ENSG00000000419.13 753 734
## ENSG00000000457.14 499 724
## ENSG00000000460.17 137 450
## ENSG00000000938.13 380 776
## TCGA-55-7727-01A-11R-2170-07 TCGA-44-2668-11A-01R-1758-07
## ENSG00000000003.15 1804 985
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1118 1278
## ENSG00000000457.14 554 413
## ENSG00000000460.17 196 135
## ENSG00000000938.13 243 12378
## TCGA-62-A46R-01A-11R-A24H-07 TCGA-55-7907-01A-11R-2170-07
## ENSG00000000003.15 2723 1877
## ENSG00000000005.6 0 4
## ENSG00000000419.13 1637 920
## ENSG00000000457.14 805 417
## ENSG00000000460.17 358 279
## ENSG00000000938.13 1153 448
## TCGA-44-2662-01B-02R-A277-07 TCGA-44-6779-01A-11R-1858-07
## ENSG00000000003.15 667 1961
## ENSG00000000005.6 10 0
## ENSG00000000419.13 554 385
## ENSG00000000457.14 716 355
## ENSG00000000460.17 619 261
## ENSG00000000938.13 455 315
## TCGA-50-5055-01A-01R-1628-07 TCGA-67-6216-01A-11R-1755-07
## ENSG00000000003.15 2078 5010
## ENSG00000000005.6 5 917
## ENSG00000000419.13 1004 640
## ENSG00000000457.14 368 522
## ENSG00000000460.17 117 275
## ENSG00000000938.13 763 649
## TCGA-05-4415-01A-22R-1858-07 TCGA-86-7713-01A-11R-2066-07
## ENSG00000000003.15 2626 8522
## ENSG00000000005.6 0 0
## ENSG00000000419.13 865 2548
## ENSG00000000457.14 585 2384
## ENSG00000000460.17 730 1790
## ENSG00000000938.13 266 491
## TCGA-86-8073-01A-11R-2241-07 TCGA-55-6969-01A-11R-1949-07
## ENSG00000000003.15 3418 2233
## ENSG00000000005.6 0 0
## ENSG00000000419.13 2937 2803
## ENSG00000000457.14 1008 940
## ENSG00000000460.17 592 647
## ENSG00000000938.13 1926 1136
## TCGA-38-4627-01A-01R-1206-07 TCGA-MN-A4N4-01A-12R-A24X-07
## ENSG00000000003.15 10673 3365
## ENSG00000000005.6 3 0
## ENSG00000000419.13 2002 2244
## ENSG00000000457.14 605 777
## ENSG00000000460.17 335 451
## ENSG00000000938.13 1185 507
## TCGA-62-A46O-01A-11R-A24H-07 TCGA-86-8280-01A-11R-2287-07
## ENSG00000000003.15 5948 7090
## ENSG00000000005.6 0 7
## ENSG00000000419.13 1174 1599
## ENSG00000000457.14 628 1377
## ENSG00000000460.17 572 441
## ENSG00000000938.13 113 1778
## TCGA-67-6215-01A-11R-1755-07 TCGA-44-A47G-01A-21R-A24H-07
## ENSG00000000003.15 1975 2272
## ENSG00000000005.6 4 0
## ENSG00000000419.13 1197 851
## ENSG00000000457.14 1705 565
## ENSG00000000460.17 560 199
## ENSG00000000938.13 1658 2245
## TCGA-78-7536-01A-11R-2066-07 TCGA-44-5645-11A-01R-1628-07
## ENSG00000000003.15 4409 1294
## ENSG00000000005.6 0 3
## ENSG00000000419.13 3799 1108
## ENSG00000000457.14 1998 673
## ENSG00000000460.17 1683 114
## ENSG00000000938.13 1088 3578
## TCGA-91-6831-01A-11R-1858-07 TCGA-97-A4M0-01A-11R-A24X-07
## ENSG00000000003.15 1419 943
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1693 1236
## ENSG00000000457.14 541 1215
## ENSG00000000460.17 508 390
## ENSG00000000938.13 557 1161
## TCGA-44-5645-01B-04R-A277-07 TCGA-55-A493-01A-11R-A24H-07
## ENSG00000000003.15 1807 1841
## ENSG00000000005.6 41 19
## ENSG00000000419.13 620 1308
## ENSG00000000457.14 1368 445
## ENSG00000000460.17 695 414
## ENSG00000000938.13 227 1800
## TCGA-55-6985-01A-11R-1949-07 TCGA-55-6543-01A-11R-1755-07
## ENSG00000000003.15 3932 4662
## ENSG00000000005.6 0 1
## ENSG00000000419.13 980 1414
## ENSG00000000457.14 407 543
## ENSG00000000460.17 207 161
## ENSG00000000938.13 554 1236
## TCGA-50-5936-01A-11R-1628-07 TCGA-05-4432-01A-01R-1206-07
## ENSG00000000003.15 2532 3883
## ENSG00000000005.6 0 0
## ENSG00000000419.13 616 2952
## ENSG00000000457.14 487 1682
## ENSG00000000460.17 310 1267
## ENSG00000000938.13 517 1829
## TCGA-93-7348-01A-21R-2039-07 TCGA-55-6984-11A-01R-1949-07
## ENSG00000000003.15 2336 2056
## ENSG00000000005.6 1 5
## ENSG00000000419.13 1095 1573
## ENSG00000000457.14 715 774
## ENSG00000000460.17 286 154
## ENSG00000000938.13 412 3092
## TCGA-55-6984-01A-11R-1949-07 TCGA-78-7147-01A-11R-2039-07
## ENSG00000000003.15 843 8038
## ENSG00000000005.6 0 6
## ENSG00000000419.13 739 1763
## ENSG00000000457.14 653 1005
## ENSG00000000460.17 148 552
## ENSG00000000938.13 312 510
## TCGA-50-5941-01A-11R-1755-07 TCGA-4B-A93V-01A-11R-A39D-07
## ENSG00000000003.15 3341 5108
## ENSG00000000005.6 1 1
## ENSG00000000419.13 1340 1570
## ENSG00000000457.14 603 641
## ENSG00000000460.17 231 525
## ENSG00000000938.13 1189 577
## TCGA-91-A4BC-01A-11R-A24H-07 TCGA-78-7167-01A-11R-2066-07
## ENSG00000000003.15 1501 3739
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1483 2047
## ENSG00000000457.14 363 2240
## ENSG00000000460.17 286 525
## ENSG00000000938.13 560 355
## TCGA-95-7043-01A-11R-1949-07 TCGA-67-6217-01A-11R-1755-07
## ENSG00000000003.15 1475 2119
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1275 1026
## ENSG00000000457.14 373 752
## ENSG00000000460.17 172 189
## ENSG00000000938.13 81 539
## TCGA-55-7726-01A-11R-2170-07 TCGA-49-6745-11A-01R-1858-07
## ENSG00000000003.15 1188 2183
## ENSG00000000005.6 0 3
## ENSG00000000419.13 1564 1573
## ENSG00000000457.14 269 480
## ENSG00000000460.17 219 87
## ENSG00000000938.13 321 1621
## TCGA-49-6745-01A-11R-1858-07 TCGA-50-5932-11A-01R-1755-07
## ENSG00000000003.15 2687 622
## ENSG00000000005.6 3 1
## ENSG00000000419.13 1157 619
## ENSG00000000457.14 443 411
## ENSG00000000460.17 345 79
## ENSG00000000938.13 1059 1896
## TCGA-55-8619-01A-11R-2403-07 TCGA-78-7166-01A-12R-2066-07
## ENSG00000000003.15 1020 2966
## ENSG00000000005.6 1 0
## ENSG00000000419.13 831 1346
## ENSG00000000457.14 518 1050
## ENSG00000000460.17 132 376
## ENSG00000000938.13 2581 434
## TCGA-55-1596-01A-01R-0946-07 TCGA-73-4670-01A-01R-1206-07
## ENSG00000000003.15 6067 4513
## ENSG00000000005.6 0 0
## ENSG00000000419.13 2126 4348
## ENSG00000000457.14 677 1080
## ENSG00000000460.17 651 1161
## ENSG00000000938.13 362 904
## TCGA-05-5420-01A-01R-1628-07 TCGA-95-7562-01A-11R-2241-07
## ENSG00000000003.15 3641 9980
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1804 2705
## ENSG00000000457.14 687 1459
## ENSG00000000460.17 338 1699
## ENSG00000000938.13 1533 884
## TCGA-95-7947-01A-11R-2187-07 TCGA-55-8090-01A-11R-2241-07
## ENSG00000000003.15 5315 3866
## ENSG00000000005.6 9 0
## ENSG00000000419.13 3541 1516
## ENSG00000000457.14 1756 779
## ENSG00000000460.17 918 218
## ENSG00000000938.13 969 1468
## TCGA-73-7498-01A-12R-2187-07 TCGA-44-6146-01A-11R-A278-07
## ENSG00000000003.15 5042 791
## ENSG00000000005.6 4 0
## ENSG00000000419.13 1966 881
## ENSG00000000457.14 1920 639
## ENSG00000000460.17 410 98
## ENSG00000000938.13 1404 77
## TCGA-44-6146-01A-11R-1755-07 TCGA-91-7771-01A-11R-2170-07
## ENSG00000000003.15 1430 3663
## ENSG00000000005.6 0 3
## ENSG00000000419.13 1562 2084
## ENSG00000000457.14 998 640
## ENSG00000000460.17 157 228
## ENSG00000000938.13 261 1200
## TCGA-44-7670-01A-11R-2066-07 TCGA-49-4501-01A-01R-1206-07
## ENSG00000000003.15 6072 8645
## ENSG00000000005.6 0 8
## ENSG00000000419.13 4411 3076
## ENSG00000000457.14 593 1623
## ENSG00000000460.17 748 517
## ENSG00000000938.13 488 1273
## TCGA-05-4427-01A-21R-1858-07 TCGA-MP-A4T4-01A-11R-A262-07
## ENSG00000000003.15 1413 2165
## ENSG00000000005.6 86 0
## ENSG00000000419.13 1452 1587
## ENSG00000000457.14 375 747
## ENSG00000000460.17 387 370
## ENSG00000000938.13 886 1634
## TCGA-44-7671-01A-11R-2066-07 TCGA-78-8660-01A-11R-2403-07
## ENSG00000000003.15 3794 1486
## ENSG00000000005.6 7 2
## ENSG00000000419.13 1695 1226
## ENSG00000000457.14 1007 465
## ENSG00000000460.17 295 441
## ENSG00000000938.13 408 1223
## TCGA-55-6978-11A-01R-1949-07 TCGA-05-4244-01A-01R-1107-07
## ENSG00000000003.15 1500 5001
## ENSG00000000005.6 3 0
## ENSG00000000419.13 869 1452
## ENSG00000000457.14 367 1308
## ENSG00000000460.17 89 789
## ENSG00000000938.13 2130 1963
## TCGA-J2-8192-01A-11R-2241-07 TCGA-L4-A4E5-01A-11R-A24X-07
## ENSG00000000003.15 9590 1501
## ENSG00000000005.6 3 0
## ENSG00000000419.13 2299 1769
## ENSG00000000457.14 1423 790
## ENSG00000000460.17 419 330
## ENSG00000000938.13 2024 348
## TCGA-86-8055-01A-11R-2241-07 TCGA-55-A48X-01A-11R-A24H-07
## ENSG00000000003.15 8641 3709
## ENSG00000000005.6 3 0
## ENSG00000000419.13 2135 1283
## ENSG00000000457.14 1175 1022
## ENSG00000000460.17 549 452
## ENSG00000000938.13 1499 830
## TCGA-71-8520-01A-11R-2403-07 TCGA-55-8096-01A-11R-2241-07
## ENSG00000000003.15 2582 8028
## ENSG00000000005.6 0 3
## ENSG00000000419.13 878 2142
## ENSG00000000457.14 571 1352
## ENSG00000000460.17 424 622
## ENSG00000000938.13 324 2641
## TCGA-50-5930-11A-01R-1755-07 TCGA-97-8171-01A-11R-2287-07
## ENSG00000000003.15 459 3729
## ENSG00000000005.6 0 0
## ENSG00000000419.13 725 1589
## ENSG00000000457.14 372 2702
## ENSG00000000460.17 74 631
## ENSG00000000938.13 5527 468
## TCGA-55-A492-01A-11R-A24H-07 TCGA-95-7567-01A-11R-2066-07
## ENSG00000000003.15 3035 2546
## ENSG00000000005.6 1 108
## ENSG00000000419.13 1344 3384
## ENSG00000000457.14 1052 1409
## ENSG00000000460.17 157 908
## ENSG00000000938.13 382 807
## TCGA-44-3396-01A-01R-1206-07 TCGA-69-7974-01A-11R-2187-07
## ENSG00000000003.15 9058 3169
## ENSG00000000005.6 3 47
## ENSG00000000419.13 4525 2278
## ENSG00000000457.14 977 943
## ENSG00000000460.17 870 585
## ENSG00000000938.13 4621 2660
## TCGA-44-6775-01C-02R-A277-07 TCGA-44-6775-01A-11R-1858-07
## ENSG00000000003.15 1218 3446
## ENSG00000000005.6 10 2
## ENSG00000000419.13 558 1469
## ENSG00000000457.14 830 706
## ENSG00000000460.17 463 415
## ENSG00000000938.13 305 1332
## TCGA-44-6775-01A-11R-A278-07 TCGA-44-2656-01A-02R-A278-07
## ENSG00000000003.15 2487 4199
## ENSG00000000005.6 2 5
## ENSG00000000419.13 1141 2074
## ENSG00000000457.14 845 1952
## ENSG00000000460.17 413 666
## ENSG00000000938.13 742 878
## TCGA-NJ-A4YP-01A-11R-A262-07 TCGA-44-2656-01B-06R-A277-07
## ENSG00000000003.15 4154 1196
## ENSG00000000005.6 20 4
## ENSG00000000419.13 3191 359
## ENSG00000000457.14 2074 621
## ENSG00000000460.17 1090 373
## ENSG00000000938.13 1082 328
## TCGA-53-7813-01A-11R-2170-07 TCGA-35-4122-01A-01R-1107-07
## ENSG00000000003.15 1952 10798
## ENSG00000000005.6 0 36
## ENSG00000000419.13 956 3551
## ENSG00000000457.14 573 542
## ENSG00000000460.17 195 1115
## ENSG00000000938.13 175 1698
## TCGA-86-8056-01A-11R-2241-07 TCGA-78-7159-01A-11R-2039-07
## ENSG00000000003.15 2472 4288
## ENSG00000000005.6 2 1
## ENSG00000000419.13 2471 1335
## ENSG00000000457.14 755 637
## ENSG00000000460.17 209 539
## ENSG00000000938.13 1891 565
## TCGA-91-8497-01A-11R-2403-07 TCGA-86-8074-01A-11R-2241-07
## ENSG00000000003.15 769 8673
## ENSG00000000005.6 7 1
## ENSG00000000419.13 725 3153
## ENSG00000000457.14 536 1713
## ENSG00000000460.17 109 1067
## ENSG00000000938.13 1204 1226
## TCGA-05-4249-01A-01R-1107-07 TCGA-78-7145-01A-11R-2039-07
## ENSG00000000003.15 4383 3519
## ENSG00000000005.6 0 3
## ENSG00000000419.13 2006 1422
## ENSG00000000457.14 1632 490
## ENSG00000000460.17 482 459
## ENSG00000000938.13 1209 698
## TCGA-78-7158-01A-11R-2039-07 TCGA-71-6725-01A-11R-1858-07
## ENSG00000000003.15 2778 4597
## ENSG00000000005.6 3 0
## ENSG00000000419.13 1145 2710
## ENSG00000000457.14 4417 1064
## ENSG00000000460.17 421 326
## ENSG00000000938.13 390 1230
## TCGA-86-7714-01A-12R-2170-07 TCGA-55-6983-01A-11R-1949-07
## ENSG00000000003.15 2223 4826
## ENSG00000000005.6 2 0
## ENSG00000000419.13 711 1471
## ENSG00000000457.14 397 1135
## ENSG00000000460.17 155 334
## ENSG00000000938.13 329 1036
## TCGA-78-7163-01A-12R-2066-07 TCGA-62-A46Y-01A-11R-A24H-07
## ENSG00000000003.15 9492 5460
## ENSG00000000005.6 1 1
## ENSG00000000419.13 2524 1682
## ENSG00000000457.14 741 855
## ENSG00000000460.17 225 253
## ENSG00000000938.13 110 1256
## TCGA-35-5375-01A-01R-1628-07 TCGA-97-A4M5-01A-11R-A24X-07
## ENSG00000000003.15 3715 4006
## ENSG00000000005.6 0 3
## ENSG00000000419.13 2468 1433
## ENSG00000000457.14 249 820
## ENSG00000000460.17 395 258
## ENSG00000000938.13 150 2105
## TCGA-97-7547-01A-11R-2039-07 TCGA-44-6778-01A-11R-1858-07
## ENSG00000000003.15 7284 886
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1092 988
## ENSG00000000457.14 1218 601
## ENSG00000000460.17 176 299
## ENSG00000000938.13 534 1954
## TCGA-86-8281-01A-11R-2287-07 TCGA-49-4486-01A-01R-1206-07
## ENSG00000000003.15 8297 4586
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1960 2629
## ENSG00000000457.14 2100 1512
## ENSG00000000460.17 543 311
## ENSG00000000938.13 519 266
## TCGA-62-8399-01A-21R-2326-07 TCGA-44-8117-01A-11R-2241-07
## ENSG00000000003.15 3014 2255
## ENSG00000000005.6 0 0
## ENSG00000000419.13 2149 3896
## ENSG00000000457.14 843 2317
## ENSG00000000460.17 663 1258
## ENSG00000000938.13 1084 756
## TCGA-44-4112-01A-01R-1107-07 TCGA-05-4397-01A-01R-1206-07
## ENSG00000000003.15 3014 7805
## ENSG00000000005.6 7 19
## ENSG00000000419.13 1924 9358
## ENSG00000000457.14 1208 1789
## ENSG00000000460.17 506 2743
## ENSG00000000938.13 596 1775
## TCGA-55-8094-01A-11R-2241-07 TCGA-44-3918-01B-02R-A277-07
## ENSG00000000003.15 3719 848
## ENSG00000000005.6 0 23
## ENSG00000000419.13 2174 552
## ENSG00000000457.14 677 900
## ENSG00000000460.17 386 562
## ENSG00000000938.13 117 286
## TCGA-49-6767-01A-11R-1858-07 TCGA-44-3918-01A-01R-1107-07
## ENSG00000000003.15 9713 5980
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1640 3549
## ENSG00000000457.14 382 1297
## ENSG00000000460.17 475 673
## ENSG00000000938.13 648 1529
## TCGA-44-3918-01A-01R-A278-07 TCGA-93-A4JQ-01A-11R-A24X-07
## ENSG00000000003.15 4233 1305
## ENSG00000000005.6 10 12
## ENSG00000000419.13 3306 1352
## ENSG00000000457.14 1631 915
## ENSG00000000460.17 842 621
## ENSG00000000938.13 403 1419
## TCGA-62-8394-01A-11R-2326-07 TCGA-62-A46U-01A-11R-A24H-07
## ENSG00000000003.15 6428 1168
## ENSG00000000005.6 4 0
## ENSG00000000419.13 1522 1460
## ENSG00000000457.14 594 681
## ENSG00000000460.17 767 363
## ENSG00000000938.13 533 6236
## TCGA-69-7761-01A-11R-2170-07 TCGA-MP-A4TH-01A-31R-A262-07
## ENSG00000000003.15 1252 1093
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1554 752
## ENSG00000000457.14 1259 1113
## ENSG00000000460.17 484 213
## ENSG00000000938.13 1193 1106
## TCGA-86-8669-01A-11R-2403-07 TCGA-44-A479-01A-31R-A24H-07
## ENSG00000000003.15 2203 1889
## ENSG00000000005.6 0 0
## ENSG00000000419.13 889 1941
## ENSG00000000457.14 535 548
## ENSG00000000460.17 188 301
## ENSG00000000938.13 632 879
## TCGA-64-1679-01A-21R-2066-07 TCGA-50-6595-11A-01R-1858-07
## ENSG00000000003.15 5817 1546
## ENSG00000000005.6 1 1
## ENSG00000000419.13 1664 1563
## ENSG00000000457.14 754 975
## ENSG00000000460.17 605 145
## ENSG00000000938.13 1174 4125
## TCGA-99-8032-01A-11R-2241-07 TCGA-55-6968-01A-11R-1949-07
## ENSG00000000003.15 2721 1194
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1993 1812
## ENSG00000000457.14 824 489
## ENSG00000000460.17 461 617
## ENSG00000000938.13 658 951
## TCGA-44-8120-01A-11R-2241-07 TCGA-97-8174-01A-11R-2287-07
## ENSG00000000003.15 7846 4413
## ENSG00000000005.6 0 1
## ENSG00000000419.13 2104 1726
## ENSG00000000457.14 922 1895
## ENSG00000000460.17 268 411
## ENSG00000000938.13 1152 1527
## TCGA-38-4632-11A-01R-1755-07 TCGA-38-4632-01A-01R-1755-07
## ENSG00000000003.15 764 3401
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1088 1626
## ENSG00000000457.14 265 405
## ENSG00000000460.17 74 474
## ENSG00000000938.13 2501 685
## TCGA-MP-A4SY-01A-21R-A24X-07 TCGA-95-8039-01A-11R-2241-07
## ENSG00000000003.15 3991 5552
## ENSG00000000005.6 2 4
## ENSG00000000419.13 2162 2216
## ENSG00000000457.14 703 1103
## ENSG00000000460.17 492 425
## ENSG00000000938.13 755 1684
## TCGA-L9-A8F4-01A-11R-A39D-07 TCGA-55-6971-11A-01R-1949-07
## ENSG00000000003.15 2714 706
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1785 811
## ENSG00000000457.14 711 635
## ENSG00000000460.17 571 104
## ENSG00000000938.13 1693 2901
## TCGA-86-8075-01A-11R-2241-07 TCGA-69-7760-01A-11R-2170-07
## ENSG00000000003.15 11426 12460
## ENSG00000000005.6 3 8122
## ENSG00000000419.13 2428 1295
## ENSG00000000457.14 1236 575
## ENSG00000000460.17 888 309
## ENSG00000000938.13 1375 143
## TCGA-75-5126-01A-01R-1755-07 TCGA-91-6849-01A-11R-1949-07
## ENSG00000000003.15 2050 1853
## ENSG00000000005.6 4 1
## ENSG00000000419.13 1410 675
## ENSG00000000457.14 254 409
## ENSG00000000460.17 203 93
## ENSG00000000938.13 1652 1364
## TCGA-86-7711-01A-11R-2066-07 TCGA-05-4417-01A-22R-1858-07
## ENSG00000000003.15 4241 2292
## ENSG00000000005.6 0 3
## ENSG00000000419.13 2643 654
## ENSG00000000457.14 950 460
## ENSG00000000460.17 1010 165
## ENSG00000000938.13 1615 658
## TCGA-97-8552-01A-11R-2403-07 TCGA-93-A4JN-01A-11R-A24X-07
## ENSG00000000003.15 4123 3860
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1390 2367
## ENSG00000000457.14 568 1158
## ENSG00000000460.17 116 1084
## ENSG00000000938.13 1770 937
## TCGA-05-4389-01A-01R-1206-07 TCGA-64-5815-01A-01R-1628-07
## ENSG00000000003.15 9134 1139
## ENSG00000000005.6 1 8
## ENSG00000000419.13 2215 552
## ENSG00000000457.14 1106 234
## ENSG00000000460.17 770 127
## ENSG00000000938.13 1592 839
## TCGA-49-4512-11A-01R-1858-07 TCGA-L9-A50W-01A-12R-A39D-07
## ENSG00000000003.15 1509 3010
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1306 1343
## ENSG00000000457.14 361 956
## ENSG00000000460.17 59 175
## ENSG00000000938.13 1744 675
## TCGA-86-8278-01A-11R-2287-07 TCGA-75-6203-01A-11R-1755-07
## ENSG00000000003.15 8232 1786
## ENSG00000000005.6 624 0
## ENSG00000000419.13 2642 737
## ENSG00000000457.14 1369 435
## ENSG00000000460.17 511 118
## ENSG00000000938.13 1157 2878
## TCGA-78-8655-01A-11R-2403-07 TCGA-97-8547-01A-11R-2403-07
## ENSG00000000003.15 3347 1482
## ENSG00000000005.6 0 126
## ENSG00000000419.13 899 506
## ENSG00000000457.14 824 313
## ENSG00000000460.17 245 125
## ENSG00000000938.13 835 509
## TCGA-55-8097-01A-11R-2241-07 TCGA-73-4668-01A-01R-1206-07
## ENSG00000000003.15 2818 8875
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1328 3908
## ENSG00000000457.14 1262 805
## ENSG00000000460.17 270 924
## ENSG00000000938.13 1200 1227
## TCGA-55-7910-01A-11R-2170-07 TCGA-44-2668-01A-01R-A278-07
## ENSG00000000003.15 923 1092
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1785 1202
## ENSG00000000457.14 392 514
## ENSG00000000460.17 289 356
## ENSG00000000938.13 282 775
## TCGA-55-8301-01A-11R-2287-07 TCGA-55-A490-01A-11R-A466-07
## ENSG00000000003.15 2121 735
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1609 705
## ENSG00000000457.14 1181 478
## ENSG00000000460.17 580 112
## ENSG00000000938.13 2021 1499
## TCGA-78-7152-01A-11R-2039-07 TCGA-86-8671-01A-11R-2403-07
## ENSG00000000003.15 1225 1811
## ENSG00000000005.6 1 1
## ENSG00000000419.13 340 865
## ENSG00000000457.14 347 788
## ENSG00000000460.17 136 195
## ENSG00000000938.13 398 2406
## TCGA-55-6972-11A-01R-1949-07 TCGA-55-7913-01B-11R-2241-07
## ENSG00000000003.15 573 7249
## ENSG00000000005.6 1 0
## ENSG00000000419.13 619 3144
## ENSG00000000457.14 370 1768
## ENSG00000000460.17 68 1359
## ENSG00000000938.13 2599 798
## TCGA-73-4659-01A-01R-1206-07 TCGA-75-5147-01A-01R-1628-07
## ENSG00000000003.15 5100 15402
## ENSG00000000005.6 1 20
## ENSG00000000419.13 2394 3402
## ENSG00000000457.14 939 1474
## ENSG00000000460.17 673 863
## ENSG00000000938.13 2717 1484
## TCGA-44-2662-11A-01R-1758-07 TCGA-05-4403-01A-01R-1206-07
## ENSG00000000003.15 1120 3759
## ENSG00000000005.6 7 3
## ENSG00000000419.13 980 2482
## ENSG00000000457.14 343 1322
## ENSG00000000460.17 72 229
## ENSG00000000938.13 4442 3772
## TCGA-91-6831-11A-02R-1858-07 TCGA-38-4627-11A-01R-1758-07
## ENSG00000000003.15 348 1229
## ENSG00000000005.6 0 1
## ENSG00000000419.13 752 1703
## ENSG00000000457.14 211 345
## ENSG00000000460.17 38 122
## ENSG00000000938.13 1506 3772
## TCGA-53-7626-01A-12R-2066-07 TCGA-55-7728-01A-11R-2187-07
## ENSG00000000003.15 3249 1998
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1969 909
## ENSG00000000457.14 1345 1281
## ENSG00000000460.17 427 191
## ENSG00000000938.13 2713 11350
## TCGA-91-6840-01A-11R-1949-07 TCGA-55-8507-01A-11R-2403-07
## ENSG00000000003.15 4820 2585
## ENSG00000000005.6 3 0
## ENSG00000000419.13 1405 1542
## ENSG00000000457.14 459 664
## ENSG00000000460.17 398 454
## ENSG00000000938.13 408 895
## TCGA-69-7973-01A-11R-2187-07 TCGA-44-5645-01A-01R-A278-07
## ENSG00000000003.15 4156 7077
## ENSG00000000005.6 0 8
## ENSG00000000419.13 2481 1657
## ENSG00000000457.14 1271 1611
## ENSG00000000460.17 1147 405
## ENSG00000000938.13 547 495
## TCGA-62-A46S-01A-11R-A24H-07 TCGA-44-5645-01A-01R-1628-07
## ENSG00000000003.15 5963 5638
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1075 1208
## ENSG00000000457.14 979 883
## ENSG00000000460.17 222 211
## ENSG00000000938.13 856 736
## TCGA-86-A4JF-01A-11R-A24X-07 TCGA-55-8087-01A-11R-2241-07
## ENSG00000000003.15 1213 4201
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1359 1793
## ENSG00000000457.14 805 2799
## ENSG00000000460.17 667 432
## ENSG00000000938.13 1226 1042
## TCGA-05-5715-01A-01R-1628-07 TCGA-50-6593-01A-11R-1755-07
## ENSG00000000003.15 3190 5471
## ENSG00000000005.6 0 0
## ENSG00000000419.13 496 617
## ENSG00000000457.14 555 390
## ENSG00000000460.17 243 223
## ENSG00000000938.13 994 1079
## TCGA-97-A4LX-01A-11R-A24X-07 TCGA-44-2666-01A-01R-0946-07
## ENSG00000000003.15 2279 2692
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1583 2287
## ENSG00000000457.14 804 1122
## ENSG00000000460.17 303 449
## ENSG00000000938.13 2305 647
## TCGA-44-2666-01A-01R-A278-07 TCGA-55-6985-11A-01R-1949-07
## ENSG00000000003.15 1768 2794
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1703 1659
## ENSG00000000457.14 997 905
## ENSG00000000460.17 381 204
## ENSG00000000938.13 248 7083
## TCGA-86-8668-01A-11R-2403-07 TCGA-69-7978-01A-11R-2187-07
## ENSG00000000003.15 1370 3153
## ENSG00000000005.6 0 3
## ENSG00000000419.13 614 2063
## ENSG00000000457.14 408 683
## ENSG00000000460.17 130 375
## ENSG00000000938.13 510 2700
## TCGA-49-AAR2-01A-11R-A39D-07 TCGA-44-A4SS-01A-11R-A24X-07
## ENSG00000000003.15 2620 3623
## ENSG00000000005.6 0 0
## ENSG00000000419.13 587 1388
## ENSG00000000457.14 369 688
## ENSG00000000460.17 166 452
## ENSG00000000938.13 575 1312
## TCGA-78-7146-01A-11R-2039-07 TCGA-44-3398-01A-01R-1107-07
## ENSG00000000003.15 3428 8235
## ENSG00000000005.6 1 129
## ENSG00000000419.13 1453 2969
## ENSG00000000457.14 497 954
## ENSG00000000460.17 753 434
## ENSG00000000938.13 521 1646
## TCGA-44-3398-11B-01R-1758-07 TCGA-64-5774-01A-01R-1628-07
## ENSG00000000003.15 1988 4884
## ENSG00000000005.6 6 0
## ENSG00000000419.13 1520 1736
## ENSG00000000457.14 815 647
## ENSG00000000460.17 170 543
## ENSG00000000938.13 6303 245
## TCGA-55-8616-01A-11R-2403-07 TCGA-55-7570-01A-11R-2039-07
## ENSG00000000003.15 2718 2325
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1165 2313
## ENSG00000000457.14 639 776
## ENSG00000000460.17 293 1012
## ENSG00000000938.13 525 296
## TCGA-38-7271-01A-11R-2039-07 TCGA-55-7914-01A-11R-2170-07
## ENSG00000000003.15 1536 2936
## ENSG00000000005.6 0 0
## ENSG00000000419.13 765 1447
## ENSG00000000457.14 510 827
## ENSG00000000460.17 172 241
## ENSG00000000938.13 911 343
## TCGA-55-1595-01A-01R-0946-07 TCGA-49-6742-01A-11R-1858-07
## ENSG00000000003.15 2784 3257
## ENSG00000000005.6 4 0
## ENSG00000000419.13 2321 918
## ENSG00000000457.14 1529 706
## ENSG00000000460.17 637 219
## ENSG00000000938.13 1087 283
## TCGA-49-6742-11A-01R-1858-07 TCGA-38-4630-01A-01R-1206-07
## ENSG00000000003.15 1688 13245
## ENSG00000000005.6 4 77
## ENSG00000000419.13 982 3874
## ENSG00000000457.14 413 2285
## ENSG00000000460.17 85 2616
## ENSG00000000938.13 2118 657
## TCGA-44-2661-01A-01R-1107-07 TCGA-55-1592-01A-01R-0946-07
## ENSG00000000003.15 4558 8260
## ENSG00000000005.6 0 6
## ENSG00000000419.13 2089 3235
## ENSG00000000457.14 1025 1587
## ENSG00000000460.17 315 779
## ENSG00000000938.13 1760 2943
## TCGA-50-5932-01A-11R-1755-07 TCGA-55-1594-01A-01R-0946-07
## ENSG00000000003.15 4545 10056
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1557 6290
## ENSG00000000457.14 1185 672
## ENSG00000000460.17 322 768
## ENSG00000000938.13 424 633
## TCGA-44-7661-01A-11R-2066-07 TCGA-44-6147-11A-01R-1858-07
## ENSG00000000003.15 2138 849
## ENSG00000000005.6 4 1
## ENSG00000000419.13 2179 1237
## ENSG00000000457.14 830 290
## ENSG00000000460.17 610 95
## ENSG00000000938.13 2447 4712
## TCGA-44-6147-01A-11R-1755-07 TCGA-05-4384-01A-01R-1755-07
## ENSG00000000003.15 2834 1343
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1307 907
## ENSG00000000457.14 715 823
## ENSG00000000460.17 285 189
## ENSG00000000938.13 1045 1047
## TCGA-44-6147-01A-11R-A278-07 TCGA-80-5607-01A-31R-1949-07
## ENSG00000000003.15 2178 1901
## ENSG00000000005.6 4 6
## ENSG00000000419.13 1430 961
## ENSG00000000457.14 1139 225
## ENSG00000000460.17 434 169
## ENSG00000000938.13 606 447
## TCGA-50-5072-01A-21R-1858-07 TCGA-05-4396-01A-21R-1858-07
## ENSG00000000003.15 2752 1107
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1162 776
## ENSG00000000457.14 1088 495
## ENSG00000000460.17 888 115
## ENSG00000000938.13 692 135
## TCGA-67-3770-01A-01R-0946-07 TCGA-73-4676-01A-01R-1755-07
## ENSG00000000003.15 3661 4702
## ENSG00000000005.6 18 0
## ENSG00000000419.13 662 1571
## ENSG00000000457.14 436 432
## ENSG00000000460.17 96 488
## ENSG00000000938.13 471 738
## TCGA-50-5939-01A-11R-1628-07 TCGA-91-6836-11A-01R-1858-07
## ENSG00000000003.15 1588 743
## ENSG00000000005.6 1 0
## ENSG00000000419.13 816 739
## ENSG00000000457.14 359 405
## ENSG00000000460.17 193 60
## ENSG00000000938.13 783 3111
## TCGA-91-6836-01A-21R-1858-07 TCGA-05-4434-01A-01R-1206-07
## ENSG00000000003.15 1538 3342
## ENSG00000000005.6 2 0
## ENSG00000000419.13 2094 2713
## ENSG00000000457.14 403 1920
## ENSG00000000460.17 415 953
## ENSG00000000938.13 433 4108
## TCGA-55-8621-01A-11R-2403-07 TCGA-05-4402-01A-01R-1206-07
## ENSG00000000003.15 1567 12949
## ENSG00000000005.6 1 6
## ENSG00000000419.13 1007 2197
## ENSG00000000457.14 541 1530
## ENSG00000000460.17 203 701
## ENSG00000000938.13 2945 1531
## TCGA-44-3917-01A-01R-A278-07 TCGA-73-4677-01A-01R-1206-07
## ENSG00000000003.15 2436 4473
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1918 2253
## ENSG00000000457.14 895 1465
## ENSG00000000460.17 780 348
## ENSG00000000938.13 126 1150
## TCGA-67-3773-01A-01R-0946-07 TCGA-91-6848-01A-11R-1949-07
## ENSG00000000003.15 848 1028
## ENSG00000000005.6 1 1
## ENSG00000000419.13 797 997
## ENSG00000000457.14 446 216
## ENSG00000000460.17 95 302
## ENSG00000000938.13 370 991
## TCGA-50-6592-01A-11R-1755-07 TCGA-86-7954-01A-11R-2187-07
## ENSG00000000003.15 3334 9079
## ENSG00000000005.6 1 3
## ENSG00000000419.13 2504 3519
## ENSG00000000457.14 657 1630
## ENSG00000000460.17 621 1097
## ENSG00000000938.13 1294 1745
## TCGA-55-6981-01A-11R-1949-07 TCGA-86-8054-01A-11R-2241-07
## ENSG00000000003.15 2982 8681
## ENSG00000000005.6 0 0
## ENSG00000000419.13 657 3598
## ENSG00000000457.14 341 1030
## ENSG00000000460.17 179 1492
## ENSG00000000938.13 340 544
## TCGA-J2-A4AD-01A-11R-A24H-07 TCGA-62-A46P-01A-11R-A24H-07
## ENSG00000000003.15 3719 1766
## ENSG00000000005.6 2 0
## ENSG00000000419.13 2154 1661
## ENSG00000000457.14 1319 475
## ENSG00000000460.17 563 95
## ENSG00000000938.13 357 368
## TCGA-86-7955-01A-11R-2187-07 TCGA-50-6590-01A-12R-1858-07
## ENSG00000000003.15 4279 544
## ENSG00000000005.6 0 0
## ENSG00000000419.13 3773 1174
## ENSG00000000457.14 757 309
## ENSG00000000460.17 685 382
## ENSG00000000938.13 236 1052
## TCGA-97-8179-01A-11R-2287-07 TCGA-91-A4BD-01A-11R-A24H-07
## ENSG00000000003.15 6146 1443
## ENSG00000000005.6 5 0
## ENSG00000000419.13 2211 1275
## ENSG00000000457.14 1158 230
## ENSG00000000460.17 632 104
## ENSG00000000938.13 720 930
## TCGA-50-5931-11A-01R-1858-07 TCGA-78-7162-01A-21R-2066-07
## ENSG00000000003.15 577 2960
## ENSG00000000005.6 1 1
## ENSG00000000419.13 941 1384
## ENSG00000000457.14 275 1039
## ENSG00000000460.17 36 204
## ENSG00000000938.13 2662 935
## TCGA-50-5931-01A-11R-1755-07 TCGA-78-8662-01A-11R-2403-07
## ENSG00000000003.15 1584 2347
## ENSG00000000005.6 2 2
## ENSG00000000419.13 1594 603
## ENSG00000000457.14 835 469
## ENSG00000000460.17 510 301
## ENSG00000000938.13 114 240
## TCGA-75-5125-01A-01R-1755-07 TCGA-55-8512-01A-11R-2403-07
## ENSG00000000003.15 3214 3334
## ENSG00000000005.6 18 8
## ENSG00000000419.13 1552 1317
## ENSG00000000457.14 414 644
## ENSG00000000460.17 316 120
## ENSG00000000938.13 1211 1241
## TCGA-73-A9RS-01A-11R-A41B-07 TCGA-69-7764-01A-11R-2170-07
## ENSG00000000003.15 583 857
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1436 1000
## ENSG00000000457.14 482 863
## ENSG00000000460.17 278 346
## ENSG00000000938.13 248 236
## TCGA-L9-A5IP-01A-21R-A39D-07 TCGA-55-6987-01A-11R-1949-07
## ENSG00000000003.15 1113 4509
## ENSG00000000005.6 0 3
## ENSG00000000419.13 1180 2182
## ENSG00000000457.14 982 1523
## ENSG00000000460.17 642 819
## ENSG00000000938.13 361 3999
## TCGA-38-A44F-01A-11R-A24H-07 TCGA-NJ-A4YI-01A-11R-A262-07
## ENSG00000000003.15 2450 662
## ENSG00000000005.6 0 107
## ENSG00000000419.13 1360 919
## ENSG00000000457.14 591 557
## ENSG00000000460.17 119 193
## ENSG00000000938.13 1854 843
## TCGA-62-8398-01A-11R-2326-07 TCGA-55-6978-01A-11R-1949-07
## ENSG00000000003.15 4410 4422
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1630 2578
## ENSG00000000457.14 940 794
## ENSG00000000460.17 763 879
## ENSG00000000938.13 907 1896
## TCGA-93-A4JP-01A-11R-A24X-07 TCGA-75-6214-01A-41R-1949-07
## ENSG00000000003.15 4887 4375
## ENSG00000000005.6 5 2
## ENSG00000000419.13 1677 4095
## ENSG00000000457.14 982 934
## ENSG00000000460.17 509 673
## ENSG00000000938.13 2570 425
## TCGA-S2-AA1A-01A-12R-A39D-07 TCGA-44-6777-11A-01R-1858-07
## ENSG00000000003.15 4735 689
## ENSG00000000005.6 2 2
## ENSG00000000419.13 1031 653
## ENSG00000000457.14 765 269
## ENSG00000000460.17 152 57
## ENSG00000000938.13 783 2924
## TCGA-44-6774-01A-21R-1858-07 TCGA-55-6982-01A-11R-1949-07
## ENSG00000000003.15 1403 2291
## ENSG00000000005.6 1 2
## ENSG00000000419.13 662 586
## ENSG00000000457.14 256 384
## ENSG00000000460.17 158 206
## ENSG00000000938.13 372 373
## TCGA-05-5423-01A-01R-1628-07 TCGA-55-A57B-01A-12R-A39D-07
## ENSG00000000003.15 4477 5052
## ENSG00000000005.6 12 7
## ENSG00000000419.13 2813 1288
## ENSG00000000457.14 1014 1049
## ENSG00000000460.17 525 320
## ENSG00000000938.13 742 1272
## TCGA-L9-A443-01A-12R-A24H-07 TCGA-05-4430-01A-02R-1206-07
## ENSG00000000003.15 2101 6642
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1078 2471
## ENSG00000000457.14 750 806
## ENSG00000000460.17 237 495
## ENSG00000000938.13 484 1613
## TCGA-55-A4DG-01A-11R-A24H-07 TCGA-55-8506-01A-11R-2403-07
## ENSG00000000003.15 788 1597
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1386 1124
## ENSG00000000457.14 1305 538
## ENSG00000000460.17 228 292
## ENSG00000000938.13 492 531
## TCGA-05-5428-01A-01R-1628-07 TCGA-44-7672-01A-11R-2066-07
## ENSG00000000003.15 2096 4242
## ENSG00000000005.6 0 16
## ENSG00000000419.13 3780 1827
## ENSG00000000457.14 698 661
## ENSG00000000460.17 594 405
## ENSG00000000938.13 254 2584
## TCGA-64-5781-01A-01R-1628-07 TCGA-05-4422-01A-01R-1206-07
## ENSG00000000003.15 1826 4911
## ENSG00000000005.6 1 1
## ENSG00000000419.13 1309 1213
## ENSG00000000457.14 521 2786
## ENSG00000000460.17 238 753
## ENSG00000000938.13 245 858
## TCGA-44-2656-01A-02R-0946-07 TCGA-78-7535-01A-11R-2066-07
## ENSG00000000003.15 6039 4813
## ENSG00000000005.6 1 0
## ENSG00000000419.13 2198 2160
## ENSG00000000457.14 1767 785
## ENSG00000000460.17 689 305
## ENSG00000000938.13 2550 3024
## TCGA-86-8358-01A-11R-2326-07 TCGA-62-8402-01A-11R-2326-07
## ENSG00000000003.15 669 6379
## ENSG00000000005.6 0 0
## ENSG00000000419.13 889 2002
## ENSG00000000457.14 442 658
## ENSG00000000460.17 662 345
## ENSG00000000938.13 247 1287
## TCGA-MP-A4T7-01A-11R-A24X-07 TCGA-50-8460-01A-11R-2326-07
## ENSG00000000003.15 1456 2120
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1851 1161
## ENSG00000000457.14 650 432
## ENSG00000000460.17 378 126
## ENSG00000000938.13 1686 981
## TCGA-69-8253-01A-11R-2287-07 TCGA-49-6761-01A-31R-1949-07
## ENSG00000000003.15 2492 3950
## ENSG00000000005.6 2 10
## ENSG00000000419.13 3880 608
## ENSG00000000457.14 1100 375
## ENSG00000000460.17 204 210
## ENSG00000000938.13 1036 595
## TCGA-L9-A444-01A-21R-A24H-07 TCGA-49-6761-11A-01R-1949-07
## ENSG00000000003.15 1040 696
## ENSG00000000005.6 1 0
## ENSG00000000419.13 859 558
## ENSG00000000457.14 597 290
## ENSG00000000460.17 151 54
## ENSG00000000938.13 901 2471
## TCGA-55-7227-01A-11R-2039-07 TCGA-44-7669-01A-21R-2066-07
## ENSG00000000003.15 2235 3352
## ENSG00000000005.6 18 0
## ENSG00000000419.13 918 2922
## ENSG00000000457.14 488 1147
## ENSG00000000460.17 187 1262
## ENSG00000000938.13 1107 1333
## TCGA-75-7031-01A-11R-1949-07 TCGA-78-7143-01A-11R-2039-07
## ENSG00000000003.15 1703 4537
## ENSG00000000005.6 0 3
## ENSG00000000419.13 702 1202
## ENSG00000000457.14 457 1099
## ENSG00000000460.17 178 568
## ENSG00000000938.13 494 972
## TCGA-55-8092-01A-11R-2241-07 TCGA-49-4507-01A-01R-1206-07
## ENSG00000000003.15 4028 2096
## ENSG00000000005.6 1 0
## ENSG00000000419.13 3934 1665
## ENSG00000000457.14 1783 403
## ENSG00000000460.17 881 270
## ENSG00000000938.13 702 565
## TCGA-73-4666-01A-01R-1206-07 TCGA-91-8499-01A-11R-2403-07
## ENSG00000000003.15 4382 13000
## ENSG00000000005.6 7 1
## ENSG00000000419.13 2336 2073
## ENSG00000000457.14 1320 426
## ENSG00000000460.17 1540 624
## ENSG00000000938.13 1939 654
## TCGA-86-8359-01A-11R-2326-07 TCGA-50-6595-01A-12R-1858-07
## ENSG00000000003.15 2085 1329
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1867 1395
## ENSG00000000457.14 605 297
## ENSG00000000460.17 227 320
## ENSG00000000938.13 659 387
## TCGA-55-6968-11A-01R-1949-07 TCGA-44-7667-01A-31R-2066-07
## ENSG00000000003.15 548 2366
## ENSG00000000005.6 1 1
## ENSG00000000419.13 709 2760
## ENSG00000000457.14 399 807
## ENSG00000000460.17 92 1011
## ENSG00000000938.13 3718 491
## TCGA-99-8033-01A-11R-2241-07 TCGA-62-8397-01A-11R-2326-07
## ENSG00000000003.15 2519 1233
## ENSG00000000005.6 0 0
## ENSG00000000419.13 3146 411
## ENSG00000000457.14 1741 272
## ENSG00000000460.17 1144 34
## ENSG00000000938.13 1340 285
## TCGA-MP-A4TA-01A-21R-A24X-07 TCGA-55-8510-01A-11R-2403-07
## ENSG00000000003.15 2165 2611
## ENSG00000000005.6 1 0
## ENSG00000000419.13 2351 828
## ENSG00000000457.14 776 503
## ENSG00000000460.17 960 332
## ENSG00000000938.13 744 2151
## TCGA-50-6591-01A-11R-1755-07 TCGA-05-5425-01A-02R-1628-07
## ENSG00000000003.15 2307 2393
## ENSG00000000005.6 1295 0
## ENSG00000000419.13 2030 2238
## ENSG00000000457.14 631 640
## ENSG00000000460.17 823 468
## ENSG00000000938.13 115 1232
## TCGA-49-AARR-01A-11R-A41B-07 TCGA-55-7574-01A-11R-2039-07
## ENSG00000000003.15 3859 996
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1149 737
## ENSG00000000457.14 600 478
## ENSG00000000460.17 123 275
## ENSG00000000938.13 1524 827
## TCGA-44-7662-01A-11R-2066-07 TCGA-69-7979-01A-11R-2187-07
## ENSG00000000003.15 3513 1683
## ENSG00000000005.6 0 0
## ENSG00000000419.13 3093 2845
## ENSG00000000457.14 1291 957
## ENSG00000000460.17 1794 1233
## ENSG00000000938.13 1332 998
## TCGA-55-8614-01A-11R-2403-07 TCGA-69-7980-01A-11R-2187-07
## ENSG00000000003.15 3630 4915
## ENSG00000000005.6 2 0
## ENSG00000000419.13 1122 2023
## ENSG00000000457.14 399 1156
## ENSG00000000460.17 289 670
## ENSG00000000938.13 338 1463
## TCGA-55-A494-01A-11R-A24X-07 TCGA-73-4675-01A-01R-1206-07
## ENSG00000000003.15 1305 5937
## ENSG00000000005.6 0 0
## ENSG00000000419.13 732 1435
## ENSG00000000457.14 929 1093
## ENSG00000000460.17 344 254
## ENSG00000000938.13 160 1487
## TCGA-05-4390-01A-02R-1755-07 TCGA-97-7941-01A-11R-2187-07
## ENSG00000000003.15 2311 11528
## ENSG00000000005.6 0 13
## ENSG00000000419.13 1331 1547
## ENSG00000000457.14 385 1257
## ENSG00000000460.17 499 232
## ENSG00000000938.13 540 1465
## TCGA-05-4433-01A-22R-1858-07 TCGA-69-7765-01A-11R-2170-07
## ENSG00000000003.15 870 2063
## ENSG00000000005.6 0 1
## ENSG00000000419.13 734 1209
## ENSG00000000457.14 672 717
## ENSG00000000460.17 184 251
## ENSG00000000938.13 1939 609
## TCGA-86-8672-01A-21R-2403-07 TCGA-49-4506-01A-01R-1206-07
## ENSG00000000003.15 1329 1998
## ENSG00000000005.6 1 3
## ENSG00000000419.13 1259 1969
## ENSG00000000457.14 274 983
## ENSG00000000460.17 190 665
## ENSG00000000938.13 917 576
## TCGA-MP-A4TE-01A-22R-A466-07 TCGA-55-8508-01A-11R-2403-07
## ENSG00000000003.15 4548 1431
## ENSG00000000005.6 4 0
## ENSG00000000419.13 1531 764
## ENSG00000000457.14 704 513
## ENSG00000000460.17 295 258
## ENSG00000000938.13 376 693
## TCGA-44-2668-01B-02R-A277-07 TCGA-75-6206-01A-11R-1755-07
## ENSG00000000003.15 613 1198
## ENSG00000000005.6 0 0
## ENSG00000000419.13 789 815
## ENSG00000000457.14 889 680
## ENSG00000000460.17 577 146
## ENSG00000000938.13 1914 1094
## TCGA-50-5946-01A-11R-1755-07 TCGA-97-A4M1-01A-11R-A24X-07
## ENSG00000000003.15 3601 2633
## ENSG00000000005.6 2 5
## ENSG00000000419.13 2721 1583
## ENSG00000000457.14 1287 908
## ENSG00000000460.17 1075 246
## ENSG00000000938.13 281 1402
## TCGA-55-6972-01A-11R-1949-07 TCGA-64-1681-01A-11R-2066-07
## ENSG00000000003.15 1575 8534
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1819 2291
## ENSG00000000457.14 917 886
## ENSG00000000460.17 173 419
## ENSG00000000938.13 38 1134
## TCGA-97-A4M7-01A-11R-A24X-07 TCGA-44-2662-01A-01R-A278-07
## ENSG00000000003.15 3582 3666
## ENSG00000000005.6 1 5
## ENSG00000000419.13 1222 1861
## ENSG00000000457.14 789 647
## ENSG00000000460.17 344 959
## ENSG00000000938.13 1746 1275
## TCGA-38-4628-01A-01R-1206-07 TCGA-55-7283-01A-11R-2039-07
## ENSG00000000003.15 3942 2504
## ENSG00000000005.6 1 3
## ENSG00000000419.13 3034 1060
## ENSG00000000457.14 1280 631
## ENSG00000000460.17 913 316
## ENSG00000000938.13 602 657
## TCGA-44-2662-01A-01R-0946-07 TCGA-49-4488-01A-01R-1755-07
## ENSG00000000003.15 5758 1848
## ENSG00000000005.6 0 0
## ENSG00000000419.13 2608 1264
## ENSG00000000457.14 666 590
## ENSG00000000460.17 1221 172
## ENSG00000000938.13 3435 579
## TCGA-50-8457-01A-11R-2326-07 TCGA-86-6562-01A-11R-1755-07
## ENSG00000000003.15 1815 3136
## ENSG00000000005.6 4 0
## ENSG00000000419.13 999 1460
## ENSG00000000457.14 918 950
## ENSG00000000460.17 263 531
## ENSG00000000938.13 1054 586
## TCGA-75-7030-01A-11R-1949-07 TCGA-55-7815-01A-11R-2170-07
## ENSG00000000003.15 1523 2508
## ENSG00000000005.6 1 0
## ENSG00000000419.13 388 775
## ENSG00000000457.14 329 466
## ENSG00000000460.17 69 172
## ENSG00000000938.13 432 303
## TCGA-J2-A4AG-01A-11R-A24H-07 TCGA-44-2666-01B-02R-A277-07
## ENSG00000000003.15 2607 801
## ENSG00000000005.6 148 16
## ENSG00000000419.13 1051 469
## ENSG00000000457.14 565 877
## ENSG00000000460.17 284 377
## ENSG00000000938.13 874 86
## TCGA-78-7542-01A-21R-2066-07 TCGA-62-A472-01A-11R-A24H-07
## ENSG00000000003.15 2326 908
## ENSG00000000005.6 0 0
## ENSG00000000419.13 3111 722
## ENSG00000000457.14 969 412
## ENSG00000000460.17 1208 113
## ENSG00000000938.13 382 398
## TCGA-69-8254-01A-11R-2287-07 TCGA-55-6712-01A-11R-1858-07
## ENSG00000000003.15 10178 1849
## ENSG00000000005.6 10 1
## ENSG00000000419.13 2362 812
## ENSG00000000457.14 3230 406
## ENSG00000000460.17 505 186
## ENSG00000000938.13 1773 535
## TCGA-44-2657-11A-01R-1758-07 TCGA-44-2657-01A-01R-1107-07
## ENSG00000000003.15 881 2650
## ENSG00000000005.6 1 5
## ENSG00000000419.13 941 1322
## ENSG00000000457.14 536 661
## ENSG00000000460.17 144 249
## ENSG00000000938.13 2954 1125
## TCGA-MP-A4TI-01A-21R-A24X-07 TCGA-55-8505-01A-11R-2403-07
## ENSG00000000003.15 1717 1501
## ENSG00000000005.6 1 0
## ENSG00000000419.13 755 1160
## ENSG00000000457.14 608 473
## ENSG00000000460.17 456 282
## ENSG00000000938.13 3008 346
## TCGA-49-4510-01A-01R-1206-07 TCGA-44-2665-11A-01R-1758-07
## ENSG00000000003.15 6084 1745
## ENSG00000000005.6 0 2
## ENSG00000000419.13 2553 1381
## ENSG00000000457.14 856 574
## ENSG00000000460.17 300 162
## ENSG00000000938.13 1005 3844
## TCGA-64-1676-01A-01R-0946-07 TCGA-44-2665-01A-01R-0946-07
## ENSG00000000003.15 5889 4236
## ENSG00000000005.6 0 1321
## ENSG00000000419.13 2845 1799
## ENSG00000000457.14 476 1301
## ENSG00000000460.17 252 376
## ENSG00000000938.13 742 823
## TCGA-97-7937-01A-11R-2170-07 TCGA-05-4418-01A-01R-1206-07
## ENSG00000000003.15 4337 7666
## ENSG00000000005.6 0 1
## ENSG00000000419.13 3201 1574
## ENSG00000000457.14 1367 948
## ENSG00000000460.17 625 424
## ENSG00000000938.13 307 1210
## TCGA-86-8076-01A-31R-2241-07 TCGA-55-8091-01A-11R-2241-07
## ENSG00000000003.15 3269 3632
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1540 1796
## ENSG00000000457.14 1171 774
## ENSG00000000460.17 318 228
## ENSG00000000938.13 1915 1171
## TCGA-38-4631-01A-01R-1755-07 TCGA-44-2655-11A-01R-1758-07
## ENSG00000000003.15 2567 2497
## ENSG00000000005.6 1 7
## ENSG00000000419.13 1771 1525
## ENSG00000000457.14 354 803
## ENSG00000000460.17 488 165
## ENSG00000000938.13 343 3215
## TCGA-44-2659-01A-01R-0946-07 TCGA-55-7725-01A-11R-2170-07
## ENSG00000000003.15 1845 942
## ENSG00000000005.6 9 2
## ENSG00000000419.13 1669 878
## ENSG00000000457.14 1203 635
## ENSG00000000460.17 293 150
## ENSG00000000938.13 1004 589
## TCGA-38-4629-01A-02R-1206-07 TCGA-05-4382-01A-01R-1206-07
## ENSG00000000003.15 7434 3887
## ENSG00000000005.6 0 0
## ENSG00000000419.13 2968 2823
## ENSG00000000457.14 555 1024
## ENSG00000000460.17 643 724
## ENSG00000000938.13 2145 4651
## TCGA-73-4676-11A-01R-1755-07 TCGA-44-6146-01B-04R-A277-07
## ENSG00000000003.15 823 721
## ENSG00000000005.6 1 14
## ENSG00000000419.13 923 650
## ENSG00000000457.14 230 773
## ENSG00000000460.17 76 343
## ENSG00000000938.13 3373 81
## TCGA-38-6178-01A-11R-1755-07 TCGA-55-7573-01A-11R-2039-07
## ENSG00000000003.15 2182 3083
## ENSG00000000005.6 0 1
## ENSG00000000419.13 737 808
## ENSG00000000457.14 376 767
## ENSG00000000460.17 142 276
## ENSG00000000938.13 387 532
## TCGA-50-5935-01A-11R-1755-07 TCGA-55-8207-01A-11R-2241-07
## ENSG00000000003.15 2190 4227
## ENSG00000000005.6 2 1
## ENSG00000000419.13 1125 2297
## ENSG00000000457.14 1122 1052
## ENSG00000000460.17 357 416
## ENSG00000000938.13 560 2089
## TCGA-50-5935-11A-01R-1858-07 TCGA-50-5051-01A-21R-1858-07
## ENSG00000000003.15 1189 2627
## ENSG00000000005.6 7 1
## ENSG00000000419.13 709 602
## ENSG00000000457.14 361 452
## ENSG00000000460.17 54 171
## ENSG00000000938.13 1815 283
## TCGA-44-3917-01B-02R-A277-07 TCGA-78-7540-01A-11R-2066-07
## ENSG00000000003.15 854 1919
## ENSG00000000005.6 9 0
## ENSG00000000419.13 689 2082
## ENSG00000000457.14 789 899
## ENSG00000000460.17 777 262
## ENSG00000000938.13 134 2432
## TCGA-NJ-A7XG-01A-12R-A39D-07 TCGA-55-7576-01A-11R-2066-07
## ENSG00000000003.15 5137 4196
## ENSG00000000005.6 0 1
## ENSG00000000419.13 1429 2406
## ENSG00000000457.14 831 2073
## ENSG00000000460.17 244 1106
## ENSG00000000938.13 156 1454
## TCGA-64-5775-01A-01R-1628-07 TCGA-55-8203-01A-11R-2241-07
## ENSG00000000003.15 3577 4144
## ENSG00000000005.6 13 0
## ENSG00000000419.13 1206 2439
## ENSG00000000457.14 263 1644
## ENSG00000000460.17 172 670
## ENSG00000000938.13 758 1364
## TCGA-55-6986-01A-11R-1949-07 TCGA-86-6851-01A-11R-1949-07
## ENSG00000000003.15 3399 2918
## ENSG00000000005.6 6 0
## ENSG00000000419.13 967 2183
## ENSG00000000457.14 407 1616
## ENSG00000000460.17 107 781
## ENSG00000000938.13 423 2165
## TCGA-49-4505-01A-01R-1206-07 TCGA-73-4662-01A-01R-1206-07
## ENSG00000000003.15 7867 6839
## ENSG00000000005.6 44 150
## ENSG00000000419.13 3030 2512
## ENSG00000000457.14 1011 2889
## ENSG00000000460.17 203 1865
## ENSG00000000938.13 1878 2257
## TCGA-55-6981-11A-01R-1949-07 TCGA-38-4625-01A-01R-1206-07
## ENSG00000000003.15 835 10120
## ENSG00000000005.6 0 0
## ENSG00000000419.13 572 6714
## ENSG00000000457.14 274 1006
## ENSG00000000460.17 93 2088
## ENSG00000000938.13 1564 1873
## TCGA-55-7284-01B-11R-2241-07 TCGA-38-4625-11A-01R-1758-07
## ENSG00000000003.15 2720 1990
## ENSG00000000005.6 1 9
## ENSG00000000419.13 1963 1984
## ENSG00000000457.14 1690 608
## ENSG00000000460.17 307 161
## ENSG00000000938.13 4064 12548
## TCGA-55-8089-01A-11R-2241-07 TCGA-38-4626-01A-01R-1206-07
## ENSG00000000003.15 3540 2330
## ENSG00000000005.6 1 0
## ENSG00000000419.13 2662 2728
## ENSG00000000457.14 1091 965
## ENSG00000000460.17 819 318
## ENSG00000000938.13 3082 10286
## TCGA-44-5644-01A-21R-2039-07 TCGA-44-6144-11A-01R-1755-07
## ENSG00000000003.15 1221 651
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1129 927
## ENSG00000000457.14 545 462
## ENSG00000000460.17 318 94
## ENSG00000000938.13 196 4866
## TCGA-91-6835-01A-11R-1858-07 TCGA-L4-A4E6-01A-11R-A24H-07
## ENSG00000000003.15 3528 785
## ENSG00000000005.6 0 3
## ENSG00000000419.13 1092 969
## ENSG00000000457.14 1081 574
## ENSG00000000460.17 563 136
## ENSG00000000938.13 1724 6791
## TCGA-05-4398-01A-01R-1206-07 TCGA-97-A4M6-01A-11R-A24X-07
## ENSG00000000003.15 6052 7372
## ENSG00000000005.6 3 4
## ENSG00000000419.13 3726 1315
## ENSG00000000457.14 1336 1351
## ENSG00000000460.17 1243 332
## ENSG00000000938.13 3469 1245
## TCGA-49-AAR9-01A-21R-A41B-07 TCGA-75-7027-01A-11R-1949-07
## ENSG00000000003.15 2860 6732
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1911 2738
## ENSG00000000457.14 815 1133
## ENSG00000000460.17 759 1258
## ENSG00000000938.13 106 592
## TCGA-49-AARO-01A-12R-A41B-07 TCGA-69-8255-01A-11R-2287-07
## ENSG00000000003.15 3647 3857
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1123 4734
## ENSG00000000457.14 569 1142
## ENSG00000000460.17 255 689
## ENSG00000000938.13 1827 1489
## TCGA-55-7816-01A-11R-2170-07 TCGA-86-A4D0-01A-11R-A24H-07
## ENSG00000000003.15 1121 3091
## ENSG00000000005.6 25 0
## ENSG00000000419.13 1434 1757
## ENSG00000000457.14 809 740
## ENSG00000000460.17 236 813
## ENSG00000000938.13 4004 309
## TCGA-NJ-A4YQ-01A-11R-A262-07 TCGA-78-8648-01A-11R-2403-07
## ENSG00000000003.15 1569 746
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1070 691
## ENSG00000000457.14 929 300
## ENSG00000000460.17 310 142
## ENSG00000000938.13 1076 2646
## TCGA-L9-A7SV-01A-11R-A39D-07 TCGA-49-AAR3-01A-11R-A41B-07
## ENSG00000000003.15 1694 1073
## ENSG00000000005.6 2 1
## ENSG00000000419.13 2119 892
## ENSG00000000457.14 761 351
## ENSG00000000460.17 385 307
## ENSG00000000938.13 451 809
## TCGA-44-6778-11A-01R-1858-07 TCGA-44-6145-11A-01R-1858-07
## ENSG00000000003.15 1028 776
## ENSG00000000005.6 3 2
## ENSG00000000419.13 1206 992
## ENSG00000000457.14 486 307
## ENSG00000000460.17 105 67
## ENSG00000000938.13 4093 3583
## TCGA-91-6830-01A-11R-1949-07 TCGA-MP-A4T6-01A-32R-A262-07
## ENSG00000000003.15 2295 4736
## ENSG00000000005.6 1 2
## ENSG00000000419.13 1113 1460
## ENSG00000000457.14 430 1936
## ENSG00000000460.17 222 486
## ENSG00000000938.13 654 1481
## TCGA-91-8496-01A-11R-2403-07 TCGA-55-8085-01A-11R-2241-07
## ENSG00000000003.15 4435 3893
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1059 1818
## ENSG00000000457.14 1243 1487
## ENSG00000000460.17 209 919
## ENSG00000000938.13 1434 1416
## TCGA-55-7281-01A-11R-2039-07 TCGA-05-5429-01A-01R-1628-07
## ENSG00000000003.15 3595 2184
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1163 1549
## ENSG00000000457.14 329 855
## ENSG00000000460.17 186 298
## ENSG00000000938.13 1872 102
## TCGA-44-3919-01A-02R-1107-07 TCGA-75-7025-01A-12R-1949-07
## ENSG00000000003.15 6709 9390
## ENSG00000000005.6 6 5
## ENSG00000000419.13 1668 1698
## ENSG00000000457.14 997 1277
## ENSG00000000460.17 541 294
## ENSG00000000938.13 1609 1068
## TCGA-55-8514-01A-11R-2403-07 TCGA-78-7537-01A-11R-2066-07
## ENSG00000000003.15 4916 3190
## ENSG00000000005.6 2 2
## ENSG00000000419.13 873 1083
## ENSG00000000457.14 699 1131
## ENSG00000000460.17 162 226
## ENSG00000000938.13 636 535
## TCGA-69-A59K-01A-11R-A262-07 TCGA-95-8494-01A-11R-2326-07
## ENSG00000000003.15 1338 1260
## ENSG00000000005.6 0 2
## ENSG00000000419.13 2217 1139
## ENSG00000000457.14 1101 334
## ENSG00000000460.17 620 302
## ENSG00000000938.13 1683 1037
## TCGA-MN-A4N1-01A-11R-A24X-07 TCGA-MP-A4SV-01A-11R-A24X-07
## ENSG00000000003.15 3495 2405
## ENSG00000000005.6 3 48
## ENSG00000000419.13 1625 1770
## ENSG00000000457.14 788 1076
## ENSG00000000460.17 440 672
## ENSG00000000938.13 480 1053
## TCGA-50-5942-01A-21R-1755-07 TCGA-55-7903-01A-11R-2170-07
## ENSG00000000003.15 2021 2905
## ENSG00000000005.6 0 125
## ENSG00000000419.13 833 1930
## ENSG00000000457.14 900 1103
## ENSG00000000460.17 133 574
## ENSG00000000938.13 237 701
## TCGA-55-8208-01A-11R-2241-07 TCGA-05-4395-01A-01R-1206-07
## ENSG00000000003.15 2319 3131
## ENSG00000000005.6 0 0
## ENSG00000000419.13 2237 3193
## ENSG00000000457.14 805 1326
## ENSG00000000460.17 762 483
## ENSG00000000938.13 3807 580
## TCGA-49-AAR4-01A-12R-A41B-07 TCGA-75-5122-01A-01R-1755-07
## ENSG00000000003.15 1887 1806
## ENSG00000000005.6 1 1
## ENSG00000000419.13 855 1066
## ENSG00000000457.14 517 355
## ENSG00000000460.17 320 260
## ENSG00000000938.13 1318 2381
## TCGA-73-7499-01A-11R-2187-07 TCGA-NJ-A55R-01A-11R-A262-07
## ENSG00000000003.15 6961 3444
## ENSG00000000005.6 0 3
## ENSG00000000419.13 3794 2569
## ENSG00000000457.14 1337 1977
## ENSG00000000460.17 1038 545
## ENSG00000000938.13 1538 678
## TCGA-NJ-A4YG-01A-22R-A262-07 TCGA-53-A4EZ-01A-12R-A24X-07
## ENSG00000000003.15 1678 4627
## ENSG00000000005.6 7 0
## ENSG00000000419.13 987 1318
## ENSG00000000457.14 755 691
## ENSG00000000460.17 219 354
## ENSG00000000938.13 1202 785
## TCGA-78-7156-01A-11R-2039-07 TCGA-55-6970-01A-11R-1949-07
## ENSG00000000003.15 2296 4026
## ENSG00000000005.6 2 0
## ENSG00000000419.13 951 2021
## ENSG00000000457.14 1196 1398
## ENSG00000000460.17 141 863
## ENSG00000000938.13 230 1090
## TCGA-NJ-A55O-01A-11R-A262-07 TCGA-99-8025-01A-11R-2241-07
## ENSG00000000003.15 858 3868
## ENSG00000000005.6 0 3
## ENSG00000000419.13 1048 1718
## ENSG00000000457.14 884 1126
## ENSG00000000460.17 234 450
## ENSG00000000938.13 1767 604
## TCGA-50-6594-01A-11R-1755-07 TCGA-99-7458-01A-11R-2039-07
## ENSG00000000003.15 3825 3352
## ENSG00000000005.6 0 11
## ENSG00000000419.13 1177 1447
## ENSG00000000457.14 400 1294
## ENSG00000000460.17 449 310
## ENSG00000000938.13 367 1325
## TCGA-91-6828-01A-11R-1858-07 TCGA-MP-A4TJ-01A-51R-A262-07
## ENSG00000000003.15 1472 1460
## ENSG00000000005.6 0 1
## ENSG00000000419.13 545 929
## ENSG00000000457.14 558 455
## ENSG00000000460.17 168 226
## ENSG00000000938.13 702 1326
## TCGA-91-6828-11A-01R-1858-07 TCGA-99-8028-01A-11R-2241-07
## ENSG00000000003.15 1453 2134
## ENSG00000000005.6 2 3
## ENSG00000000419.13 937 1890
## ENSG00000000457.14 354 511
## ENSG00000000460.17 91 163
## ENSG00000000938.13 2558 2963
## TCGA-91-6849-11A-01R-1949-07 TCGA-64-5778-01A-01R-1628-07
## ENSG00000000003.15 1304 5440
## ENSG00000000005.6 1 0
## ENSG00000000419.13 791 2397
## ENSG00000000457.14 375 756
## ENSG00000000460.17 57 530
## ENSG00000000938.13 1842 870
## TCGA-91-6847-01A-11R-1949-07 TCGA-91-6847-11A-01R-1949-07
## ENSG00000000003.15 12232 1275
## ENSG00000000005.6 0 0
## ENSG00000000419.13 4063 690
## ENSG00000000457.14 1599 440
## ENSG00000000460.17 1161 57
## ENSG00000000938.13 139 1439
## TCGA-49-AARQ-01A-11R-A41B-07 TCGA-55-8205-01A-11R-2241-07
## ENSG00000000003.15 2405 2157
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1434 2330
## ENSG00000000457.14 590 631
## ENSG00000000460.17 335 567
## ENSG00000000938.13 816 3184
## TCGA-49-AARN-01A-21R-A41B-07 TCGA-86-A4P7-01A-11R-A24X-07
## ENSG00000000003.15 1779 1179
## ENSG00000000005.6 0 0
## ENSG00000000419.13 911 1243
## ENSG00000000457.14 565 661
## ENSG00000000460.17 204 229
## ENSG00000000938.13 263 1866
## TCGA-MP-A4TC-01A-11R-A24X-07 TCGA-55-6975-11A-01R-1949-07
## ENSG00000000003.15 2477 510
## ENSG00000000005.6 0 2
## ENSG00000000419.13 1325 660
## ENSG00000000457.14 587 346
## ENSG00000000460.17 363 65
## ENSG00000000938.13 942 1310
## TCGA-50-8459-01A-11R-2326-07 TCGA-55-6975-01A-11R-1949-07
## ENSG00000000003.15 1799 1577
## ENSG00000000005.6 1 0
## ENSG00000000419.13 926 829
## ENSG00000000457.14 421 352
## ENSG00000000460.17 96 307
## ENSG00000000938.13 2660 113
## TCGA-95-7039-01A-11R-1949-07 TCGA-44-7659-01A-11R-2066-07
## ENSG00000000003.15 1622 945
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1300 901
## ENSG00000000457.14 341 709
## ENSG00000000460.17 235 127
## ENSG00000000938.13 281 489
## TCGA-86-8673-01A-11R-2403-07 TCGA-49-4487-01A-21R-1858-07
## ENSG00000000003.15 1759 1703
## ENSG00000000005.6 0 0
## ENSG00000000419.13 963 1635
## ENSG00000000457.14 227 380
## ENSG00000000460.17 229 479
## ENSG00000000938.13 532 906
## TCGA-78-7153-01A-11R-2039-07 TCGA-44-A4SU-01A-11R-A24X-07
## ENSG00000000003.15 5479 8804
## ENSG00000000005.6 3 2
## ENSG00000000419.13 1015 1445
## ENSG00000000457.14 549 1075
## ENSG00000000460.17 204 327
## ENSG00000000938.13 196 427
## TCGA-44-2668-01A-01R-0946-07 TCGA-49-4494-01A-01R-1206-07
## ENSG00000000003.15 3345 9092
## ENSG00000000005.6 1 4
## ENSG00000000419.13 3374 2861
## ENSG00000000457.14 1037 743
## ENSG00000000460.17 806 283
## ENSG00000000938.13 3583 1023
## TCGA-78-7154-01A-11R-2039-07 TCGA-49-AARE-01A-11R-A41B-07
## ENSG00000000003.15 4092 5209
## ENSG00000000005.6 0 2
## ENSG00000000419.13 3568 1229
## ENSG00000000457.14 622 666
## ENSG00000000460.17 508 358
## ENSG00000000938.13 302 909
## TCGA-95-A4VK-01A-11R-A262-07 TCGA-99-AA5R-01A-11R-A39D-07
## ENSG00000000003.15 2261 1428
## ENSG00000000005.6 1 1
## ENSG00000000419.13 815 1011
## ENSG00000000457.14 1190 695
## ENSG00000000460.17 285 151
## ENSG00000000938.13 599 3410
## TCGA-55-6969-11A-01R-1949-07 TCGA-97-7938-01A-11R-2170-07
## ENSG00000000003.15 513 2015
## ENSG00000000005.6 1 15
## ENSG00000000419.13 754 1215
## ENSG00000000457.14 350 693
## ENSG00000000460.17 73 212
## ENSG00000000938.13 2077 417
## TCGA-J2-A4AE-01A-21R-A24H-07 TCGA-93-7347-01A-11R-2187-07
## ENSG00000000003.15 2547 1381
## ENSG00000000005.6 0 5
## ENSG00000000419.13 1365 998
## ENSG00000000457.14 612 715
## ENSG00000000460.17 184 195
## ENSG00000000938.13 1579 1337
## TCGA-62-A470-01A-11R-A24H-07 TCGA-50-5936-11A-01R-1628-07
## ENSG00000000003.15 775 721
## ENSG00000000005.6 0 0
## ENSG00000000419.13 583 767
## ENSG00000000457.14 355 465
## ENSG00000000460.17 141 75
## ENSG00000000938.13 404 3573
## TCGA-78-7148-01A-11R-2039-07 TCGA-35-3615-01A-01R-0946-07
## ENSG00000000003.15 3257 4238
## ENSG00000000005.6 0 0
## ENSG00000000419.13 1092 2382
## ENSG00000000457.14 688 1372
## ENSG00000000460.17 262 363
## ENSG00000000938.13 362 684
## TCGA-MP-A4TK-01A-11R-A24X-07 TCGA-97-8175-01A-11R-2287-07
## ENSG00000000003.15 2170 6145
## ENSG00000000005.6 21 2
## ENSG00000000419.13 1380 2342
## ENSG00000000457.14 655 761
## ENSG00000000460.17 367 451
## ENSG00000000938.13 1714 1512
## TCGA-62-A46V-01A-11R-A24H-07 TCGA-05-4250-01A-01R-1107-07
## ENSG00000000003.15 2604 5316
## ENSG00000000005.6 1 5
## ENSG00000000419.13 1848 2886
## ENSG00000000457.14 914 631
## ENSG00000000460.17 511 716
## ENSG00000000938.13 421 1468
## TCGA-44-2665-01A-01R-A278-07 TCGA-44-2665-01B-06R-A277-07
## ENSG00000000003.15 4000 401
## ENSG00000000005.6 1254 73
## ENSG00000000419.13 2216 154
## ENSG00000000457.14 1726 221
## ENSG00000000460.17 494 102
## ENSG00000000938.13 301 79
## TCGA-50-5044-01A-21R-1858-07 TCGA-64-1678-01A-01R-0946-07
## ENSG00000000003.15 936 3861
## ENSG00000000005.6 0 0
## ENSG00000000419.13 705 3149
## ENSG00000000457.14 175 419
## ENSG00000000460.17 164 502
## ENSG00000000938.13 332 69
## TCGA-78-7155-01A-11R-2039-07 TCGA-78-7220-01A-11R-2039-07
## ENSG00000000003.15 3875 2906
## ENSG00000000005.6 8 0
## ENSG00000000419.13 1171 2724
## ENSG00000000457.14 421 909
## ENSG00000000460.17 814 760
## ENSG00000000938.13 162 286
## TCGA-80-5611-01A-01R-1628-07 TCGA-93-8067-01A-11R-2287-07
## ENSG00000000003.15 3254 3434
## ENSG00000000005.6 1 0
## ENSG00000000419.13 1781 2676
## ENSG00000000457.14 725 1269
## ENSG00000000460.17 556 765
## ENSG00000000938.13 1467 801
gene_info <- as.data.frame(rowData(data))
head(gene_info) # Preview the first few genes and their annotations
## source type score phase gene_id gene_type
## ENSG00000000003.15 HAVANA gene NA NA ENSG00000000003.15 protein_coding
## ENSG00000000005.6 HAVANA gene NA NA ENSG00000000005.6 protein_coding
## ENSG00000000419.13 HAVANA gene NA NA ENSG00000000419.13 protein_coding
## ENSG00000000457.14 HAVANA gene NA NA ENSG00000000457.14 protein_coding
## ENSG00000000460.17 HAVANA gene NA NA ENSG00000000460.17 protein_coding
## ENSG00000000938.13 HAVANA gene NA NA ENSG00000000938.13 protein_coding
## gene_name level hgnc_id havana_gene
## ENSG00000000003.15 TSPAN6 2 HGNC:11858 OTTHUMG00000022002.2
## ENSG00000000005.6 TNMD 2 HGNC:17757 OTTHUMG00000022001.2
## ENSG00000000419.13 DPM1 2 HGNC:3005 OTTHUMG00000032742.2
## ENSG00000000457.14 SCYL3 2 HGNC:19285 OTTHUMG00000035941.6
## ENSG00000000460.17 C1orf112 2 HGNC:25565 OTTHUMG00000035821.9
## ENSG00000000938.13 FGR 2 HGNC:3697 OTTHUMG00000003516.3
sample_info <- as.data.frame(colData(data))
head(sample_info) # Preview sample metadata
## barcode patient
## TCGA-73-4658-01A-01R-1755-07 TCGA-73-4658-01A-01R-1755-07 TCGA-73-4658
## TCGA-44-2661-11A-01R-1758-07 TCGA-44-2661-11A-01R-1758-07 TCGA-44-2661
## TCGA-55-6986-11A-01R-1949-07 TCGA-55-6986-11A-01R-1949-07 TCGA-55-6986
## TCGA-55-8615-01A-11R-2403-07 TCGA-55-8615-01A-11R-2403-07 TCGA-55-8615
## TCGA-97-8177-01A-11R-2287-07 TCGA-97-8177-01A-11R-2287-07 TCGA-97-8177
## TCGA-49-6744-11A-01R-1858-07 TCGA-49-6744-11A-01R-1858-07 TCGA-49-6744
## sample shortLetterCode
## TCGA-73-4658-01A-01R-1755-07 TCGA-73-4658-01A TP
## TCGA-44-2661-11A-01R-1758-07 TCGA-44-2661-11A NT
## TCGA-55-6986-11A-01R-1949-07 TCGA-55-6986-11A NT
## TCGA-55-8615-01A-11R-2403-07 TCGA-55-8615-01A TP
## TCGA-97-8177-01A-11R-2287-07 TCGA-97-8177-01A TP
## TCGA-49-6744-11A-01R-1858-07 TCGA-49-6744-11A NT
## definition sample_submitter_id
## TCGA-73-4658-01A-01R-1755-07 Primary solid Tumor TCGA-73-4658-01A
## TCGA-44-2661-11A-01R-1758-07 Solid Tissue Normal TCGA-44-2661-11A
## TCGA-55-6986-11A-01R-1949-07 Solid Tissue Normal TCGA-55-6986-11A
## TCGA-55-8615-01A-11R-2403-07 Primary solid Tumor TCGA-55-8615-01A
## TCGA-97-8177-01A-11R-2287-07 Primary solid Tumor TCGA-97-8177-01A
## TCGA-49-6744-11A-01R-1858-07 Solid Tissue Normal TCGA-49-6744-11A
## sample_type_id tumor_descriptor
## TCGA-73-4658-01A-01R-1755-07 01 Primary
## TCGA-44-2661-11A-01R-1758-07 11 Not Applicable
## TCGA-55-6986-11A-01R-1949-07 11 Not Applicable
## TCGA-55-8615-01A-11R-2403-07 01 Primary
## TCGA-97-8177-01A-11R-2287-07 01 Primary
## TCGA-49-6744-11A-01R-1858-07 11 Not Applicable
## sample_id submitter_id
## TCGA-73-4658-01A-01R-1755-07 bfde37f2-ab6f-4426-a33d-ef9d21772f02 TCGA-73-4658
## TCGA-44-2661-11A-01R-1758-07 2a7da235-f069-4efa-ac35-649520b4dbb3 TCGA-44-2661
## TCGA-55-6986-11A-01R-1949-07 e8e0e53a-4009-4141-ba6f-eed35fcaad7b TCGA-55-6986
## TCGA-55-8615-01A-11R-2403-07 7873c1a9-b1ea-46cc-b787-dfbad3366494 TCGA-55-8615
## TCGA-97-8177-01A-11R-2287-07 5711ce53-0bac-4a5d-b7ba-4e1bc0c56924 TCGA-97-8177
## TCGA-49-6744-11A-01R-1858-07 e3962e0e-eafa-4016-8fa4-1c4ba0d2a9ad TCGA-49-6744
## sample_type oct_embedded specimen_type
## TCGA-73-4658-01A-01R-1755-07 Primary Tumor <NA> Solid Tissue
## TCGA-44-2661-11A-01R-1758-07 Solid Tissue Normal <NA> Solid Tissue
## TCGA-55-6986-11A-01R-1949-07 Solid Tissue Normal <NA> Solid Tissue
## TCGA-55-8615-01A-11R-2403-07 Primary Tumor <NA> Solid Tissue
## TCGA-97-8177-01A-11R-2287-07 Primary Tumor <NA> Solid Tissue
## TCGA-49-6744-11A-01R-1858-07 Solid Tissue Normal <NA> Solid Tissue
## state is_ffpe tissue_type preservation_method
## TCGA-73-4658-01A-01R-1755-07 released FALSE Tumor Unknown
## TCGA-44-2661-11A-01R-1758-07 released FALSE Normal Unknown
## TCGA-55-6986-11A-01R-1949-07 released FALSE Normal Unknown
## TCGA-55-8615-01A-11R-2403-07 released FALSE Tumor Unknown
## TCGA-97-8177-01A-11R-2287-07 released FALSE Tumor Unknown
## TCGA-49-6744-11A-01R-1858-07 released FALSE Normal Unknown
## composition days_to_collection initial_weight
## TCGA-73-4658-01A-01R-1755-07 Not Reported NA NA
## TCGA-44-2661-11A-01R-1758-07 Not Reported NA NA
## TCGA-55-6986-11A-01R-1949-07 Not Reported NA NA
## TCGA-55-8615-01A-11R-2403-07 Not Reported NA NA
## TCGA-97-8177-01A-11R-2287-07 Not Reported NA NA
## TCGA-49-6744-11A-01R-1858-07 Not Reported NA NA
## intermediate_dimension
## TCGA-73-4658-01A-01R-1755-07 1.0
## TCGA-44-2661-11A-01R-1758-07 0.9
## TCGA-55-6986-11A-01R-1949-07 0.6
## TCGA-55-8615-01A-11R-2403-07 0.9
## TCGA-97-8177-01A-11R-2287-07 1.7
## TCGA-49-6744-11A-01R-1858-07 0.9
## pathology_report_uuid
## TCGA-73-4658-01A-01R-1755-07 37bb6a7b-4f9e-4690-b904-4eebc3189562
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 37b4238a-1aea-4670-8c43-b24525b94cdd
## TCGA-97-8177-01A-11R-2287-07 addbe4e2-114d-4e38-aa21-c3e9695da8fa
## TCGA-49-6744-11A-01R-1858-07 <NA>
## shortest_dimension longest_dimension
## TCGA-73-4658-01A-01R-1755-07 0.3 1.4
## TCGA-44-2661-11A-01R-1758-07 0.4 1.3
## TCGA-55-6986-11A-01R-1949-07 0.5 0.7
## TCGA-55-8615-01A-11R-2403-07 0.6 0.9
## TCGA-97-8177-01A-11R-2287-07 0.3 2.4
## TCGA-49-6744-11A-01R-1858-07 0.4 1.0
## synchronous_malignancy ajcc_pathologic_stage
## TCGA-73-4658-01A-01R-1755-07 Not Reported Stage IB
## TCGA-44-2661-11A-01R-1758-07 Not Reported Stage IA
## TCGA-55-6986-11A-01R-1949-07 No Stage IB
## TCGA-55-8615-01A-11R-2403-07 No Stage IIIA
## TCGA-97-8177-01A-11R-2287-07 No Stage IB
## TCGA-49-6744-11A-01R-1858-07 No Stage IIA
## days_to_diagnosis treatments
## TCGA-73-4658-01A-01R-1755-07 0 c(NA, NA....
## TCGA-44-2661-11A-01R-1758-07 0 c(NA, NA....
## TCGA-55-6986-11A-01R-1949-07 0 c(NA, NA....
## TCGA-55-8615-01A-11R-2403-07 0 c(NA, NA....
## TCGA-97-8177-01A-11R-2287-07 0 c(NA, NA....
## TCGA-49-6744-11A-01R-1858-07 0 c(NA, NA....
## last_known_disease_status
## TCGA-73-4658-01A-01R-1755-07 not reported
## TCGA-44-2661-11A-01R-1758-07 not reported
## TCGA-55-6986-11A-01R-1949-07 not reported
## TCGA-55-8615-01A-11R-2403-07 not reported
## TCGA-97-8177-01A-11R-2287-07 not reported
## TCGA-49-6744-11A-01R-1858-07 not reported
## tissue_or_organ_of_origin days_to_last_follow_up
## TCGA-73-4658-01A-01R-1755-07 Lower lobe, lung 1600
## TCGA-44-2661-11A-01R-1758-07 Upper lobe, lung 1159
## TCGA-55-6986-11A-01R-1949-07 Lower lobe, lung 3261
## TCGA-55-8615-01A-11R-2403-07 Middle lobe, lung 446
## TCGA-97-8177-01A-11R-2287-07 Lower lobe, lung 499
## TCGA-49-6744-11A-01R-1858-07 Upper lobe, lung 1683
## age_at_diagnosis
## TCGA-73-4658-01A-01R-1755-07 29508
## TCGA-44-2661-11A-01R-1758-07 25313
## TCGA-55-6986-11A-01R-1949-07 NA
## TCGA-55-8615-01A-11R-2403-07 24786
## TCGA-97-8177-01A-11R-2287-07 21648
## TCGA-49-6744-11A-01R-1858-07 23484
## primary_diagnosis
## TCGA-73-4658-01A-01R-1755-07 Adenocarcinoma, NOS
## TCGA-44-2661-11A-01R-1758-07 Adenocarcinoma, NOS
## TCGA-55-6986-11A-01R-1949-07 Bronchiolo-alveolar carcinoma, non-mucinous
## TCGA-55-8615-01A-11R-2403-07 Adenocarcinoma, NOS
## TCGA-97-8177-01A-11R-2287-07 Adenocarcinoma with mixed subtypes
## TCGA-49-6744-11A-01R-1858-07 Adenocarcinoma with mixed subtypes
## prior_malignancy year_of_diagnosis prior_treatment
## TCGA-73-4658-01A-01R-1755-07 yes 2004 No
## TCGA-44-2661-11A-01R-1758-07 yes 2009 No
## TCGA-55-6986-11A-01R-1949-07 no 2004 No
## TCGA-55-8615-01A-11R-2403-07 no 2012 No
## TCGA-97-8177-01A-11R-2287-07 no 2012 No
## TCGA-49-6744-11A-01R-1858-07 no 2010 No
## ajcc_staging_system_edition ajcc_pathologic_t
## TCGA-73-4658-01A-01R-1755-07 6th T2
## TCGA-44-2661-11A-01R-1758-07 6th T1
## TCGA-55-6986-11A-01R-1949-07 6th T2
## TCGA-55-8615-01A-11R-2403-07 7th T3
## TCGA-97-8177-01A-11R-2287-07 7th T2a
## TCGA-49-6744-11A-01R-1858-07 7th T2a
## morphology ajcc_pathologic_n ajcc_pathologic_m
## TCGA-73-4658-01A-01R-1755-07 8140/3 N0 M0
## TCGA-44-2661-11A-01R-1758-07 8140/3 N0 M0
## TCGA-55-6986-11A-01R-1949-07 8252/3 N0 M0
## TCGA-55-8615-01A-11R-2403-07 8140/3 N2 MX
## TCGA-97-8177-01A-11R-2287-07 8255/3 N0 M0
## TCGA-49-6744-11A-01R-1858-07 8255/3 N1 MX
## classification_of_tumor
## TCGA-73-4658-01A-01R-1755-07 not reported
## TCGA-44-2661-11A-01R-1758-07 not reported
## TCGA-55-6986-11A-01R-1949-07 not reported
## TCGA-55-8615-01A-11R-2403-07 not reported
## TCGA-97-8177-01A-11R-2287-07 not reported
## TCGA-49-6744-11A-01R-1858-07 not reported
## diagnosis_id icd_10_code
## TCGA-73-4658-01A-01R-1755-07 6e678430-a27c-5412-b531-49b344cadb05 C34.3
## TCGA-44-2661-11A-01R-1758-07 62584f05-9d54-5926-8d4e-2e3787dd6508 C34.1
## TCGA-55-6986-11A-01R-1949-07 8f6f13a3-c7f2-5028-b188-909c5cc21afe C34.3
## TCGA-55-8615-01A-11R-2403-07 6dbc0170-58ff-560a-800e-781837e97b76 C34.2
## TCGA-97-8177-01A-11R-2287-07 06391f78-eb30-5803-87b4-0a0c5a555399 C34.3
## TCGA-49-6744-11A-01R-1858-07 4a84a8c9-ca7d-5349-b914-fc25cac0c101 C34.1
## site_of_resection_or_biopsy tumor_grade
## TCGA-73-4658-01A-01R-1755-07 Lower lobe, lung Not Reported
## TCGA-44-2661-11A-01R-1758-07 Upper lobe, lung Not Reported
## TCGA-55-6986-11A-01R-1949-07 Lower lobe, lung Not Reported
## TCGA-55-8615-01A-11R-2403-07 Middle lobe, lung Not Reported
## TCGA-97-8177-01A-11R-2287-07 Lower lobe, lung Not Reported
## TCGA-49-6744-11A-01R-1858-07 Upper lobe, lung Not Reported
## progression_or_recurrence cigarettes_per_day
## TCGA-73-4658-01A-01R-1755-07 not reported 1.369863
## TCGA-44-2661-11A-01R-1758-07 not reported NA
## TCGA-55-6986-11A-01R-1949-07 not reported NA
## TCGA-55-8615-01A-11R-2403-07 not reported 3.671233
## TCGA-97-8177-01A-11R-2287-07 not reported NA
## TCGA-49-6744-11A-01R-1858-07 not reported 1.095890
## alcohol_history
## TCGA-73-4658-01A-01R-1755-07 Not Reported
## TCGA-44-2661-11A-01R-1758-07 Not Reported
## TCGA-55-6986-11A-01R-1949-07 Not Reported
## TCGA-55-8615-01A-11R-2403-07 Not Reported
## TCGA-97-8177-01A-11R-2287-07 Not Reported
## TCGA-49-6744-11A-01R-1858-07 Not Reported
## exposure_id years_smoked
## TCGA-73-4658-01A-01R-1755-07 80ac17a4-ed8b-5c3b-a85e-72dfab59fda8 NA
## TCGA-44-2661-11A-01R-1758-07 ccbe3e80-fbcc-51a2-a70f-ec92252879cc NA
## TCGA-55-6986-11A-01R-1949-07 2d802058-cd51-5d81-b522-97cd56503224 NA
## TCGA-55-8615-01A-11R-2403-07 4f7281f1-2b5e-5885-b53d-6306a02d5f19 NA
## TCGA-97-8177-01A-11R-2287-07 e30f73bb-50af-5e71-870b-51379f82ceca NA
## TCGA-49-6744-11A-01R-1858-07 5e2340a9-165e-541d-b36f-01ae61b4d497 NA
## pack_years_smoked race gender
## TCGA-73-4658-01A-01R-1755-07 25 white female
## TCGA-44-2661-11A-01R-1758-07 NA white female
## TCGA-55-6986-11A-01R-1949-07 NA white female
## TCGA-55-8615-01A-11R-2403-07 67 white male
## TCGA-97-8177-01A-11R-2287-07 NA white female
## TCGA-49-6744-11A-01R-1858-07 20 white female
## ethnicity vital_status age_at_index
## TCGA-73-4658-01A-01R-1755-07 not hispanic or latino Dead 80
## TCGA-44-2661-11A-01R-1758-07 not hispanic or latino Alive 69
## TCGA-55-6986-11A-01R-1949-07 not reported Alive 74
## TCGA-55-8615-01A-11R-2403-07 not hispanic or latino Alive 67
## TCGA-97-8177-01A-11R-2287-07 not hispanic or latino Alive 59
## TCGA-49-6744-11A-01R-1858-07 not reported Alive 64
## days_to_birth year_of_birth
## TCGA-73-4658-01A-01R-1755-07 -29508 1924
## TCGA-44-2661-11A-01R-1758-07 -25313 1940
## TCGA-55-6986-11A-01R-1949-07 NA 1930
## TCGA-55-8615-01A-11R-2403-07 -24786 1945
## TCGA-97-8177-01A-11R-2287-07 -21648 1953
## TCGA-49-6744-11A-01R-1858-07 -23484 1946
## demographic_id days_to_death
## TCGA-73-4658-01A-01R-1755-07 21d19606-f883-5be3-adbb-20b41c95627b 1600
## TCGA-44-2661-11A-01R-1758-07 58e2c036-94cc-5ebf-a3e3-1e0e28182c8d NA
## TCGA-55-6986-11A-01R-1949-07 da8cdcae-beae-5745-b739-df6f304f4973 NA
## TCGA-55-8615-01A-11R-2403-07 0948c018-0a80-5822-b435-758d4ba23af5 NA
## TCGA-97-8177-01A-11R-2287-07 7fcd8882-f338-5643-a7a0-184902014715 NA
## TCGA-49-6744-11A-01R-1858-07 d0e66cc5-32bd-5a01-8057-4ac557d5f5a9 NA
## year_of_death bcr_patient_barcode primary_site
## TCGA-73-4658-01A-01R-1755-07 2008 TCGA-73-4658-01A Bronchus....
## TCGA-44-2661-11A-01R-1758-07 NA TCGA-44-2661-11A Bronchus....
## TCGA-55-6986-11A-01R-1949-07 NA TCGA-55-6986-11A Bronchus....
## TCGA-55-8615-01A-11R-2403-07 NA TCGA-55-8615-01A Bronchus....
## TCGA-97-8177-01A-11R-2287-07 NA TCGA-97-8177-01A Bronchus....
## TCGA-49-6744-11A-01R-1858-07 NA TCGA-49-6744-11A Bronchus....
## project_id disease_type name
## TCGA-73-4658-01A-01R-1755-07 TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-44-2661-11A-01R-1758-07 TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-55-6986-11A-01R-1949-07 TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-55-8615-01A-11R-2403-07 TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-97-8177-01A-11R-2287-07 TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-49-6744-11A-01R-1858-07 TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## releasable released paper_patient paper_Sex
## TCGA-73-4658-01A-01R-1755-07 TRUE TRUE TCGA-73-4658 FEMALE
## TCGA-44-2661-11A-01R-1758-07 TRUE TRUE <NA> <NA>
## TCGA-55-6986-11A-01R-1949-07 TRUE TRUE <NA> <NA>
## TCGA-55-8615-01A-11R-2403-07 TRUE TRUE <NA> <NA>
## TCGA-97-8177-01A-11R-2287-07 TRUE TRUE <NA> <NA>
## TCGA-49-6744-11A-01R-1858-07 TRUE TRUE <NA> <NA>
## paper_Age.at.diagnosis paper_T.stage paper_N.stage
## TCGA-73-4658-01A-01R-1755-07 80 T2 N0
## TCGA-44-2661-11A-01R-1758-07 <NA> <NA> <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA> <NA> <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA> <NA> <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA> <NA> <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA> <NA> <NA>
## paper_Tumor.stage
## TCGA-73-4658-01A-01R-1755-07 Stage IB
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_Smoking.Status
## TCGA-73-4658-01A-01R-1755-07 Current reformed smoker for > 15 years
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_Survival paper_Transversion.High.Low
## TCGA-73-4658-01A-01R-1755-07 DECEASED High
## TCGA-44-2661-11A-01R-1758-07 <NA> <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA> <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA> <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA> <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA> <NA>
## paper_Nonsilent.Mutations
## TCGA-73-4658-01A-01R-1755-07 277
## TCGA-44-2661-11A-01R-1758-07 NA
## TCGA-55-6986-11A-01R-1949-07 NA
## TCGA-55-8615-01A-11R-2403-07 NA
## TCGA-97-8177-01A-11R-2287-07 NA
## TCGA-49-6744-11A-01R-1858-07 NA
## paper_Nonsilent.Mutations.per.Mb
## TCGA-73-4658-01A-01R-1755-07 7,95
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_Oncogene.Negative.or.Positive.Groups
## TCGA-73-4658-01A-01R-1755-07 Oncogene Negative
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_Fusions paper_expression_subtype
## TCGA-73-4658-01A-01R-1755-07 prox.-inflam
## TCGA-44-2661-11A-01R-1758-07 <NA> <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA> <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA> <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA> <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA> <NA>
## paper_chromosome.affected.by.chromothripsis
## TCGA-73-4658-01A-01R-1755-07 <NA>
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_iCluster.Group
## TCGA-73-4658-01A-01R-1755-07 4
## TCGA-44-2661-11A-01R-1758-07 NA
## TCGA-55-6986-11A-01R-1949-07 NA
## TCGA-55-8615-01A-11R-2403-07 NA
## TCGA-97-8177-01A-11R-2287-07 NA
## TCGA-49-6744-11A-01R-1858-07 NA
## paper_CIMP.methylation.signature.
## TCGA-73-4658-01A-01R-1755-07 high
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_MTOR.mechanism.of.mTOR.pathway.activation
## TCGA-73-4658-01A-01R-1755-07 unaligned
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_Ploidy.ABSOLUTE.calls
## TCGA-73-4658-01A-01R-1755-07 1,96
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
## paper_Purity.ABSOLUTE.calls
## TCGA-73-4658-01A-01R-1755-07 0,35
## TCGA-44-2661-11A-01R-1758-07 <NA>
## TCGA-55-6986-11A-01R-1949-07 <NA>
## TCGA-55-8615-01A-11R-2403-07 <NA>
## TCGA-97-8177-01A-11R-2287-07 <NA>
## TCGA-49-6744-11A-01R-1858-07 <NA>
table(sample_info$sample_type) # Summarize sample types (Tumor vs. Normal)
##
## Primary Tumor Solid Tissue Normal
## 539 59
# Extract just the normal sample info
sample_info_normal <- sample_info[sample_info$definition=="Solid Tissue Normal",]
# Look for tumor samples with normal matches from same patients
sample_info_tumor <- sample_info %>%
filter(patient %in% sample_info_normal$patient) %>%
filter(definition == "Primary solid Tumor")
# The tumor list is longer -- check out duplicate patient IDs in this list
sample_info_tumor_dups <- sample_info_tumor %>%
group_by(patient) %>%
filter(n() > 1) %>%
ungroup()
unique(sample_info_tumor_dups$patient) # There are 6 patients with multiple tumor samples
## [1] "TCGA-44-6147" "TCGA-44-2662" "TCGA-44-5645" "TCGA-44-6146" "TCGA-44-2668"
## [6] "TCGA-44-2665"
sample_info_tumor_dups_FFPE <- sample_info_tumor_dups[sample_info_tumor_dups$is_ffpe,] # OK the difference is the FFPE status.
# It seems these are the only 6 patients in the group who have FFPE samples available.
# I guess I will make the decision to keep the 6 FFPE samples regardless. Not sure if that's the right choice but I'll do it for now.
# Get the non-FFPE duplicate patient sample info
sample_info_tumor_dups_non_FFPE <- sample_info_tumor_dups[!sample_info_tumor_dups$is_ffpe,]
# Remove these IDs from the main tumor sample info
sample_info_tumor <- sample_info_tumor %>% filter(! barcode %in% sample_info_tumor_dups_non_FFPE$barcode)
# There is 1 normal sample with no matching tumor sample it seems, so remove that
sample_info_normal <- sample_info_normal %>% filter(patient != "TCGA-44-6144")
# Make the matched tumor-normal sample table
sample_info_matched_T_NM <- rbind(sample_info_tumor, sample_info_normal)[order(c(seq_len(nrow(sample_info_tumor)), seq_len(nrow(sample_info_normal)))), ]
sample_info_matched_T_NM <- sample_info_matched_T_NM %>%
dplyr::select(-treatments) %>% # Removing treatments column since it is in the form of a list and has no info
arrange(., sample_type_id) %>% # First sort by tumor vs normal
arrange(., patient) # arrange by patient to get the tumor normal pairs
## Modifying the counts table for tumor-normal matched data ##
# Keep the counts columns of sample labels that are in the T-NM matched info
sample_barcodes <- as.character(sample_info_matched_T_NM$barcode)
counts_matched_T_NM <- counts %>%
dplyr::select(all_of(sample_barcodes))
# Rename with sample label instead of sample barcode
names(counts_matched_T_NM) <- sample_info_matched_T_NM$sample
library(dplyr)
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.3.2
# Checking distribution of the whole counts table
hist(as.matrix(counts_matched_T_NM)) # whoa
hist(log2(as.matrix(counts_matched_T_NM))) # Still not normal at all
# Checking distribution of just tumor samples
counts_matched_T <- counts_matched_T_NM %>%
dplyr::select(seq(1, ncol(counts_matched_T_NM), by = 2))
hist(log2(as.matrix(counts_matched_T))) # Equally bad distribution, why is it the same though??
# Checking distribution of just normal samples
counts_matched_NM <- counts_matched_T_NM %>%
dplyr::select(seq(2, ncol(counts_matched_T_NM), by = 2))
hist(log2(as.matrix(counts_matched_NM))) # Equally bad distribution, why is it the same though????
boxplot(counts_matched_T_NM) # Boxplots for all counts looks crazy
# boxplot(counts_matched_T) # Boxplots for just tumors looks crazy
# boxplot(counts_matched_NM) # Boxplots for just normals looks crazy
## PCA to check for tumor-normal separation
colz <- as.numeric(as.factor(rep(c(0,1), length(counts_matched_T_NM)/2))) # Get color values from group
plotMDS(counts_matched_T_NM,
gene.selection = "common",
main = "PCA for TCGA-LUAD expression",
col = colz,
pch = 1
)
# Separate but not very good separation, 1 definite outlier.
# To find the outlier, plotting PCA with sample names
plotMDS(counts_matched_T_NM,
gene.selection = "common",
main = "PCA for TCGA-LUAD expression",
col = colz
#pch = 1
)
# Checking out this outlier, TCGA-38-4626-01A
hist(log2(counts_matched_T_NM$`TCGA-38-4626-01A`)) # Not obvious why it's an outlier, but must somehow be really normal-like?
## Making a dendrogram to see if the same outliers are found
sample_dist <- dist(t(counts_matched_T_NM)) # Transpose the matrix to calculate distances between samples
hc <- hclust(sample_dist) #Perform hierarchical clustering
plot(hc, main = "Dendrogram of Samples", xlab = "", sub = "", cex = 0.8) # Plot the dendrogram
# Remove the 1 most obvious outlier and its pair:
# TCGA-38-4626-01A, TCGA-38-4626-11A
counts_matched_T_NM <- counts_matched_T_NM %>% dplyr::select(-c("TCGA-38-4626-01A","TCGA-38-4626-11A"))
# counts_matched_T_NM <- counts_matched_T_NM %>% dplyr::select(-c("TCGA.38.4626.01A","TCGA.38.4626.11A"))
# Version after reading it in
## PCA to check for tumor-normal separation with outlier removed
colz2 <- as.numeric(as.factor(rep(c(0,1), length(counts_matched_T_NM)/2))) # Get color values from group
plotMDS(counts_matched_T_NM,
gene.selection = "common",
main = "PCA for TCGA-LUAD expression after outlier removal",
col = colz2,
pch = 1
)
## Saving this version of the T-NM matched counts
#write.table(counts_matched_T_NM, "../2_Outputs/3_Tumor_expression/TCGA_LUAD_counts_matched_T_NM_20241125.txt")
The matrices have messy boxplots and histograms, but since I am using the signed-rank test, it does not suppose require normally distributed data, so I have decided to go with this raw counts matrix for now.
DGE/DEG analysis is based on a recent paper: Li et al. Genome Biology (2022) 23:79 Source code: https://github.com/xihuimeijing/DEGs_Analysis_FDR/blob/main/scripts/DEGs.R Accessed 2023/08/26
Tutorial: https://rpubs.com/LiYumei/806213 Accessed 2023/08/31
Unlike the tutorial, here I perform a signed-rank test rather than a rank-sum test, as the samples are not independent (they are matched tumor and normal samples).
library(edgeR)
# Make DGElist
readCount <- counts_matched_T_NM
conditions <- factor(matrix(c(rep(c(1,2),length(counts_matched_T_NM)/2)), ncol=1)) # Columns are alternating tumor-normal pairs
y <- DGEList(counts=readCount,group=conditions)
#Filter out genes with very low counts using the filterByExpr function (default)
keep <- filterByExpr(y)
y <- y[keep,,keep.lib.sizes=FALSE]
#Perform TMM normalization and transfer to CPM (Counts Per Million)
y <- calcNormFactors(y,method="TMM")
count_norm=cpm(y)
count_norm<-as.data.frame(count_norm)
#Unlike the tutorial, here I specify "paired = TRUE" to ensure the test is a signed-rank test as opposed to a rank-sum test.
pvalues <- sapply(1:nrow(count_norm),function(i){
data<-cbind.data.frame(gene=as.numeric(t(count_norm[i,])),conditions)
p=wilcox.test(gene~conditions, data, paired = TRUE,)$p.value
return(p)
})
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
#Return false discovery rate - corrected P-values
fdr=p.adjust(pvalues,method = "fdr")
#Calculate log fold change
tumor_indexes = seq(1, ncol(count_norm), by = 2)
normal_indexes = seq(2, ncol(count_norm), by = 2)
tumor_values=count_norm[,tumor_indexes]
normal_values=count_norm[,normal_indexes]
foldChanges=log2(rowMeans(tumor_values, na.rm = TRUE)/rowMeans(normal_values, na.rm = TRUE))
#Output results to a unified table!
DGE_LUAD_T_NM_signed_rank <-data.frame(log2foldChange=foldChanges, pValues=pvalues, FDR=fdr)
rownames(DGE_LUAD_T_NM_signed_rank)=rownames(count_norm)
DGE_LUAD_T_NM_signed_rank=na.omit(DGE_LUAD_T_NM_signed_rank)
### Replace ensembl IDs with gene names
# Filter gene info to the ensembl IDs we want to replace
gene_info_DEGs <- gene_info %>%
filter(gene_id %in% rownames(DGE_LUAD_T_NM_signed_rank))
# Sort both lists by the ensembl ID
gene_info_DEGs <- gene_info_DEGs %>% arrange(., gene_id)
DGE_LUAD_T_NM_signed_rank<- DGE_LUAD_T_NM_signed_rank %>% arrange(., rownames(.))
# Add gene names to the DEGs list, remove the rownames
DGE_LUAD_T_NM_signed_rank$Gene <- gene_info_DEGs$gene_name
rownames(DGE_LUAD_T_NM_signed_rank) <- NULL
### Filter to genes below FDR < 0.05 ###
fdrThres=0.05
DGE_LUAD_T_NM_signed_rank_sig <- DGE_LUAD_T_NM_signed_rank[DGE_LUAD_T_NM_signed_rank$FDR<fdrThres,]
nrow(DGE_LUAD_T_NM_signed_rank_sig) # 13465 (2024/11/07)
## [1] 13465
log2FC_cutoff2 <- 1
v2 <- EnhancedVolcano::EnhancedVolcano(
toptable = DGE_LUAD_T_NM_signed_rank,
lab = DGE_LUAD_T_NM_signed_rank$Gene,
x = "log2foldChange",
y = "FDR",
# pCutoffCol = 'min_smoothed_fdr',
xlab = "log2FC",
ylab = "-log10(FDR)",
title = "TE DEGs",
subtitle = paste0("log2FC cutoff: ", log2FC_cutoff2),
caption = paste0("Total = ", nrow(DGE_LUAD_T_NM_signed_rank_sig[abs(DGE_LUAD_T_NM_signed_rank_sig$log2foldChange)>log2FC_cutoff2,]), " significant DEGs above log2FC cutoff"),
col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
legendPosition = "bottom",
labSize = 3,
max.overlaps = 10,
drawConnectors = TRUE,
arrowheads = FALSE,
pCutoff = 0.05,
FCcutoff = log2FC_cutoff2,
gridlines.minor = FALSE,
gridlines.major = FALSE,
#xlim = c(-3, 6)
ylim = c(0,10)
)
v2
## Warning: ggrepel: 4772 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
I have been told that the Wilcoxon signed-rank test may be much more suitable for assessing a handful of genes rather than whole-transcriptome analysis. DESeq2 is more typically used for the latter, despite the finding of the publication listed above. I will compare the results using DESeq2.
library(DESeq2)
## Warning: package 'DESeq2' was built under R version 4.3.3
library(apeglm)
readCount <- as.matrix(counts_matched_T_NM)
# Removing the outlier samples from the sample info and setting rownames as sample names
sample_info_matched_T_NM <- sample_info_matched_T_NM %>%
dplyr::filter(., sample != c("TCGA-38-4626-01A","TCGA-38-4626-11A"))
rownames(sample_info_matched_T_NM) <- sample_info_matched_T_NM$sample
# Checking the sample names are in the same order
all(colnames(readCount)==rownames(sample_info_matched_T_NM))
## [1] TRUE
# Preparing and performing DESeq
dds <- DESeqDataSetFromMatrix(countData = readCount,
colData = sample_info_matched_T_NM,
design= ~ definition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
# Filter out rows with less than 10 total counts in the smallest sample group size (114/2 = 57)
keep <- rowSums(counts(dds) >= 10) >= 57
dds <- dds[keep,]
# Set the reference level as the normal tissue -
dds$definition <- relevel(dds$definition, ref = "Solid Tissue Normal")
# Perform differential expression analysis
dds <- DESeq(dds)
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 754 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
resultsNames(dds) # lists the coefficients
## [1] "Intercept"
## [2] "definition_Primary.solid.Tumor_vs_Solid.Tissue.Normal"
# res <- results(dds,
# name="definition_Primary.solid.Tumor_vs_Solid.Tissue.Normal"
# )
# or to shrink log fold changes association with condition:
res <- lfcShrink(dds,
coef="definition_Primary.solid.Tumor_vs_Solid.Tissue.Normal",
type="apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
summary(res)
##
## out of 19062 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 9099, 48%
## LFC < 0 (down) : 5225, 27%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 10)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_table <- as.data.frame(cbind( log2FC = res$log2FoldChange, FDR = res$padj, base_mean = res$baseMean))
### Replace ensembl IDs with gene names
res_table <- res_table %>% arrange(., rownames(.))
gene_info_sorted <- gene_info %>%
arrange(., gene_id) %>%
filter(gene_id %in% rownames(res_table))
res_table$gene <- gene_info_sorted$gene_name
plotMA(res,
ylim = c(-10, 15), # Adjust y-axis limits for clarity
alpha = 0.05) # Highlights significant genes (default: FDR ≤ 0.1)
FDR_min <- 0.05
# Filter to significant genes and remove NA values
res_table_sig <- res_table[res_table$FDR<=FDR_min,]
res_table_sig <- res_table_sig[complete.cases(res_table_sig),]
# Filtering by log2FC values
nrow(res_table_sig)
## [1] 13493
hist(res_table_sig$log2FC, breaks = 100) # Looks pretty normally distributed, and cutoff of 1 would remove a lot
nrow(res_table_sig[abs(res_table_sig$log2FC)>1,]) # log2FC cutoff of 1 would be pretty good
## [1] 4542
res_table_sig_cutoff1 <- res_table_sig[abs(res_table_sig$log2FC)>1,]
nrow(res_table_sig_cutoff1) # 4542 is a solid number
## [1] 4542
DGE_LUAD_T_NM_DESeq2_sig <- res_table_sig
DGE_LUAD_T_NM_DESeq2_sig_cutoff1 <- res_table_sig_cutoff1
log2FC_cutoff <- 1
FDR_cutoff<- 0.05
v3 <- EnhancedVolcano::EnhancedVolcano(
toptable = res_table,
lab = res_table$gene,
x = "log2FC",
y = "FDR",
# pCutoffCol = 'min_smoothed_fdr',
xlab = "log2FC",
ylab = "-log10(FDR)",
title = "TE DEGs",
subtitle = paste0("log2FC cutoff: ", log2FC_cutoff),
caption = paste0("Total = ", nrow(res_table_sig[abs(res_table_sig$log2FC)>log2FC_cutoff,]), " significant DEGs above log2FC cutoff"),
col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
legendPosition = "bottom",
labSize = 3,
max.overlaps = 10,
drawConnectors = TRUE,
arrowheads = FALSE,
pCutoff = FDR_cutoff,
FCcutoff = log2FC_cutoff,
gridlines.minor = FALSE,
gridlines.major = FALSE,
#xlim = c(-3, 6)
ylim = c(0,10)
)
v3
## Warning: ggrepel: 1308 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## 2.4 Checking the outputs of the two methods
library(ggvenn)
## Loading required package: scales
DGE_LUAD_T_NM_signed_rank_sig <- read.table("../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_signed_rank_sig_20241107.txt")
# Define the gene lists
DESeq2_list <- DGE_LUAD_T_NM_DESeq2_sig$gene
signed_rank_list <- DGE_LUAD_T_NM_signed_rank_sig$Gene
library(ggvenn)
# Define the gene lists in a named list
gene_lists <- list(
"DESeq2_list" = DESeq2_list,
"signed_rank_list" = signed_rank_list
)
# Create the Venn diagram
ggvenn::ggvenn(gene_lists, fill_color = c("blue", "red"))
## ~80% agreement between the lists is really good. Since there is better justification for using DESeq2, I should probably use that one.
# Change date suffix as appropriate if modifications are made
#write.table(DGE_LUAD_T_NM_signed_rank_sig, "../2_Outputs/DGE_LUAD_T_NM_signed_rank_sig_20241107.txt", sep = '\t')
# write.table(res_table, "../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_DSeq2_20241127.txt", sep = '\t')
# write.table(res_table_sig_cutoff1, "../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_DSeq2_sig_cutoff1_20241127.txt", sep = '\t')
# write.table(res_table_sig, "../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_DSeq2_sig_20241127.txt", sep = '\t')
I downloaded this level 3 methylation 450k data from cBioPortal, from TCGA Lung Adenocarcinoma (Firehose Legacy) https://www.cbioportal.org/study/summary?id=luad_tcga (Accessed 2023/08/29) Note that this provides gene information but not probe information. I did a lot of work trying to do the analysis starting from probe level information, but ultimately decided to stick with this.
data_methylation_hm450_tumor <- read.table("../../Former_Smokers_Aim_2/1_TCGA_LUAD_multiomics/0_Unpaired_input_tables/data_methylation_hm450.txt", header=TRUE, fill=TRUE)
data_methylation_hm450_normal <- read.table("../../Former_Smokers_Aim_2/1_TCGA_LUAD_multiomics/0_Unpaired_input_tables/data_methylation_hm450_normals.txt", header=TRUE, fill=TRUE)
allIDs_tumor <- colnames(data_methylation_hm450_tumor)
allIDs_normal <- colnames(data_methylation_hm450_normal)
#Listing IDs of tumors that have matched normals by changing the tissue ID to the "tumor" identifier, "01", for matching purposes.
IDs_tumor_with_matches <-gsub(".11",".01", allIDs_normal)
#Make a table of the methylation data for tumor samples only with matching normal data.
#
data_methylation_hm450_tumor_with_matches <- data_methylation_hm450_tumor %>%
dplyr::select(any_of(IDs_tumor_with_matches))
#Make a table of the methylation data for normal samples only with matching tumor data.
# Note that 3 of the normal samples don't have a matching tumor sample:
#`TCGA.44.2655.01`, `TCGA.44.2659.01`, and `TCGA.44.2662.01` don't exist.
data_methylation_hm450_normal_with_matches <- data_methylation_hm450_normal %>%
dplyr::select(-c('TCGA.44.2655.11', 'TCGA.44.2659.11','TCGA.44.2662.11'))
#Make a combined table of matched tumor-normal samples.
data_methylation_hm450_tumor_normal_matched <- cbind(data_methylation_hm450_tumor_with_matches, data_methylation_hm450_normal_with_matches)[order(c(1:31,1:31))]
#Remove duplicate gene ID column and the entrez ID columns
data_methylation_hm450_tumor_normal_matched <- data_methylation_hm450_tumor_normal_matched[,-c(1,3,4)]
# I want to make the gene names into row names, but I cannot because some gene names appear twice.
# So, I will rename them with indexes _1 and _2 and figure out why they appeared twice later.
#Checking rows of the gene names with duplicates:
checking_dups <- data_methylation_hm450_tumor_normal_matched[data_methylation_hm450_tumor_normal_matched$Hugo_Symbol.1 %in% c("AGER", "CX3CR1", "F2R", "GADL1", "GCOM1", "KLK10", "PALM2AKAP2", "QSOX1", "RCC1"),]
# I see that these are not identical rows - the methylation values are different. So, I will go ahead and add indexes.
checking_dups <- checking_dups[order(checking_dups$Hugo_Symbol.1),]#Sort by gene name
checking_dups <- cbind(rownames(checking_dups), checking_dups[,1]) #Make table of just the gene names and row names of the original file
checking_dups[,2] <- paste(checking_dups[,2],1:2,sep="_") # Add a suffix to the gene names
#Replace the gene names in the T-NM matched file with the suffixed gene names
data_methylation_hm450_tumor_normal_matched[checking_dups[,1],1] <- checking_dups[,2]
#Now that there are no longer duplicates, make the gene names column into the row names and remove the gene names column.
rownames(data_methylation_hm450_tumor_normal_matched) <- data_methylation_hm450_tumor_normal_matched[,1]
data_methylation_hm450_tumor_normal_matched <- data_methylation_hm450_tumor_normal_matched[,2:59]
hist(as.matrix(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)]))
max(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)])
## [1] NA
min(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)])
## [1] NA
boxplot(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)])
hist(as.matrix(data_methylation_hm450_normal[3:length(data_methylation_hm450_normal)]))
boxplot(data_methylation_hm450_normal[3:length(data_methylation_hm450_normal)])
hist(as.matrix(data_methylation_hm450_tumor_normal_matched))
boxplot(data_methylation_hm450_tumor_normal_matched)
# This is definitely not a normal distribution, but the wilcoxon signed-rank test does not assume a normal distribution. However, maybe this could indicate an issue with the original files? Edit: Beta values normally have a bimodal distribution so it's not really unusual
# Shorter name for convenience
methyl_beta <- data_methylation_hm450_tumor_normal_matched
# Convert to M values
methyl_M=log2(methyl_beta/(1-methyl_beta))
# Function to remove a tumor-normal pair if one of them has an NA value. Used in the subsequent Wilcox signed-rank test.
remove_NA_pairs <- function(my_data) {
valid_columns <- c()
# Iterate through columns in pairs
for (i in seq(1, ncol(my_data), by = 2)) {
tumor_col <- my_data[, i]
normal_col <- my_data[, i+1]
# Check for NAs in the pair of columns
if (!any(is.na(tumor_col)) && !any(is.na(normal_col))) {
valid_columns <- c(valid_columns, i, i+1)
}
}
# Subset the data frame using valid column indices
result_data <- my_data[, valid_columns]
return (result_data)
}
### Wilcoxon signed-rank test ###
#Run the Wilcoxon signed-rank test for each gene.
#Paired=TRUE specifies signed-rank, na.action=na.fail specifies that an error message will be thrown if NAs are still remaining after the filtering step.
pvalues <- sapply(1:nrow(methyl_M),function(i){
M_values <- methyl_M[i,]
#Remove values from tumor-normal pairs if either of them is NA
M_values <- remove_NA_pairs(M_values)
#Make group labels to differentiate tumor and normal
group <- rep(c(1,2),length(M_values))
data<-cbind.data.frame(gene=as.numeric(t(M_values)),group)
p=wilcox.test(gene~group, data, paired = TRUE, na.action = na.fail)$p.value
return(p)
})
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
#Note that if you use don't use the remove_NA_pairs filtering step, and you use na.action=na.pass, you also don't get any error messages, and it's possible that this also effectively skips over pairs with NAs. I just couldn't find a detailed explanation of how na.pass worked in this case, so I filtered first using my own remove_NA_pairs function.
#Return false discovery rate-corrected P-values
fdr=p.adjust(pvalues,method = "fdr")
#Calculate log2 fold change
tumor_indexes = seq(1, ncol(methyl_M), by = 2)
normal_indexes = seq(2, ncol(methyl_M), by = 2)
tumor_values=methyl_beta[,tumor_indexes]
normal_values=methyl_beta[,normal_indexes]
foldChanges=log2(rowMeans(tumor_values, na.rm = TRUE)/rowMeans(normal_values, na.rm = TRUE))
# Output results to a unified table!
DMeth_LUAD_T_NM_hm450 <-data.frame(log2foldChange=foldChanges, pValues=pvalues, FDR=fdr)
DMeth_LUAD_T_NM_hm450$Gene <- rownames(methyl_beta)
rownames(DMeth_LUAD_T_NM_hm450) <- NULL
DMeth_LUAD_T_NM_hm450=na.omit(DMeth_LUAD_T_NM_hm450) # Remove any NAs
# Check on the duplicated genes, make list of the pairs that had higher FDR values for removal
library(stringr)
DMeth_LUAD_T_NM_hm450_dups_to_rm <- DMeth_LUAD_T_NM_hm450 %>%
filter(str_detect(Gene, "_")) %>%# Filter to the genes I added a _1 or _2 suffix to
arrange(., Gene) %>% # Sort by gene name
mutate(pair_id = rep(1:(n() / 2), each = 2)) %>% # Add a pair ID column
group_by(., pair_id) %>%
filter(FDR==max(FDR)) %>% # Get the maximum FDR values of the pairs
ungroup()
# Remove the less significant duplicate genes
DMeth_LUAD_T_NM_hm450 <- DMeth_LUAD_T_NM_hm450 %>%
filter(!(Gene %in% DMeth_LUAD_T_NM_hm450_dups_to_rm$Gene))
#Remove the "_n" suffixes from the remaining genes of the pair
DMeth_LUAD_T_NM_hm450 <- DMeth_LUAD_T_NM_hm450 %>%
mutate(Gene = str_remove(Gene, "_.*"))
# Keep the genes with FDR<0.05
fdrThres=0.05
DMeth_LUAD_T_NM_hm450_sig <- DMeth_LUAD_T_NM_hm450[DMeth_LUAD_T_NM_hm450$FDR<fdrThres,]
nrow(DMeth_LUAD_T_NM_hm450_sig) # 9868 (2024/11/08 PM)
## [1] 9868
# Change date suffix as appropriate if modifications are made
write.table(DMeth_LUAD_T_NM_hm450_sig, "../2_Outputs/DMeth_LUAD_T_NM_hm450_sig_20241108_PM.txt", sep = '\t')
log2FC_cutoff3 <- 0.3
v3 <- EnhancedVolcano::EnhancedVolcano(
toptable = DMeth_LUAD_T_NM_hm450,
lab = DMeth_LUAD_T_NM_hm450$Gene,
x = "log2foldChange",
y = "FDR",
# pCutoffCol = 'min_smoothed_fdr',
xlab = "log2FC",
ylab = "-log10(FDR)",
title = "TM DMGs",
subtitle = paste0("log2FC cutoff: ", log2FC_cutoff3),
caption = paste0("Total = ", nrow(DMeth_LUAD_T_NM_hm450_sig[abs(DMeth_LUAD_T_NM_hm450_sig$log2foldChange)>log2FC_cutoff3,]), " significant DEGs above log2FC cutoff"),
col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
legendPosition = "bottom",
labSize = 3,
max.overlaps = 10,
drawConnectors = TRUE,
arrowheads = FALSE,
pCutoff = 0.05,
FCcutoff = log2FC_cutoff3,
gridlines.minor = FALSE,
gridlines.major = FALSE,
xlim = c(-2, 4),
ylim = c(0,10)
)
v3
## Warning: ggrepel: 3947 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# load series and platform data from GEO
gset <- getGEO("GSE7895", GSEMatrix =TRUE, AnnotGPL=TRUE)
## Found 1 file(s)
## GSE7895_series_matrix.txt.gz
if (length(gset) > 1) idx <- grep("GPL96", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]
# make proper column names to match toptable
fvarLabels(gset) <- make.names(fvarLabels(gset))
# group membership for all samples
gsms <- paste0("22222222222222222222200000000000000000000000000000",
"00000000000000000000000111111111111111111111111111",
"1111")
sml <- strsplit(gsms, split="")[[1]]
gset <- gset[complete.cases(exprs(gset)), ] # skip missing values
# assign samples to groups and set up design matrix
gs <- factor(sml)
groups <- make.names(c("current_smoker","former_smoker","never_smoker"))
levels(gs) <- groups
gset$group <- gs
design <- model.matrix(~group + 0, gset)
colnames(design) <- levels(gs)
gset <- gset[complete.cases(exprs(gset)), ] # skip missing values
## Make histograms and boxplots to check if the data is log-transformed and needs quantile normalization ##
hist(as.matrix(exprs(gset))) # Values range 1-15, and 1 big peak around 3.
boxplot(exprs(gset)) # Same range of values, with similar-looking ranges, but not exactly the same
# Narrow range, therefore no log2 normalization needed
exprs(gset) <- normalizeBetweenArrays(exprs(gset))
boxplot(exprs(gset))
# 2024/11/12: I elected to do quantile normalization because this gave me a larger list of "persistent" genes. Could justify that it "better captures the variation between groups" etc
min(exprs(gset))
## [1] -0.2140344
max(exprs(gset))
## [1] 14.59784
## Plot PCA ##
colz <- as.numeric(as.factor(gs)) # Get color values from group
plotMDS(exprs(gset),
gene.selection = "common",
main = "PCA for GSE7895",
col = colz,
pch = 1
#labels = gs
)
legend("topright", legend = levels(as.factor(gs)),
fill = unique(colz),
title = "Smoking status")
# No separation, all mixed up. This isn't a good look.
library(stringr)
phenotypic_data <- pData(gset) # Extract phenotypic data
# List of column names I want to keep and clean up into usable labels
columns_to_find <- c("characteristics_ch1.1", "group")
# Get the column indexes
indexes <- sapply(columns_to_find, function(col_name) which(names(phenotypic_data) == col_name))
indexes <- unlist(indexes)
phenotypic_data <- phenotypic_data[,c(indexes)]
# Extract Age
phenotypic_data$age <- as.numeric(str_extract(phenotypic_data$characteristics_ch1.1, "(?<=Age:)\\d+"))
# Extract Packyears
phenotypic_data$packyears <- as.numeric(str_extract(phenotypic_data$characteristics_ch1.1, "(?<=Packyears:)\\d+"))
# Extract Time Since Quit Smoking (months)
phenotypic_data$TSQ_months <- as.numeric(str_extract(phenotypic_data$characteristics_ch1.1, "(?<=Time Since Quit Smoking \\(months\\):)\\d+"))
# Delete the original column with the unseparated info
phenotypic_data <- phenotypic_data[,-1]
# Convert the NA values for packyears for never smokers to zero (this makes sense since the never smokers have 0 pack years)
phenotypic_data$packyears[phenotypic_data$group=="never_smoker"] <- 0
# Convert the NA values for TSQ_months to zero for current smokers (again makes sense)
phenotypic_data$TSQ_months[phenotypic_data$group=="current_smoker"] <- 0
# Make column to denote just former smoking status for the linear model
phenotypic_data$former_smoking_status <- as.factor(as.numeric(phenotypic_data$group == "former_smoker"))
# Make column to denote just current smoking status for the linear model
phenotypic_data$current_smoking_status <- as.factor(as.numeric(phenotypic_data$group == "current_smoker"))
# Make column to denote just never smoking status for the linear model
phenotypic_data$never_smoking_status <- as.factor(as.numeric(phenotypic_data$group == "never_smoker"))
## Plot PCA using age to define color
# Create a gradient color palette (light blue to dark blue)
palette <- colorRampPalette(c("lightblue", "darkblue"))
## Plot PCA of age ##
colz_age <- palette(length(phenotypic_data$age))[rank(phenotypic_data$age)] # Map ages to gradient colors
plotMDS(exprs(gset),
gene.selection = "common",
main = "PCA for GSE7895 (darker blue ~ higher age)",
col = colz_age,
pch = 1
)
# Add a color bar for age
legend("topright", legend = range(phenotypic_data$age),
fill = palette(2),
title = "Age")
# Does not seem to be an age effect
### Plot PCA of packyears ###
# Excluding packyears of zero (never smokers)
pheno_packyears <- phenotypic_data[phenotypic_data$packyears!=0,]
exprs_packyears <- as.data.frame(exprs(gset)) %>%
dplyr::select(rownames(pheno_packyears))
colz_packyears <- palette(length(pheno_packyears$packyears))[rank(pheno_packyears$packyears)] # Map packyears to gradient colors
plotMDS(exprs_packyears,
gene.selection = "common",
main = "PCA for GSE7895 (darker blue ~ higher packyears)",
col = colz_packyears,
pch = 1
#labels = gs
)
# Add a color bar for packyears
legend("topright", legend = range(pheno_packyears$packyears),
fill = palette(2),
title = "Packyears")
## Does not seem to be packyears effect
### Plot PCA of time since quitting ###
pheno_tsq <- phenotypic_data[!is.na(phenotypic_data$TSQ_months),]
exprs_tsq <- as.data.frame(exprs(gset)) %>%
dplyr::select(rownames(pheno_tsq))
colz_TSQ <- palette(length(pheno_tsq$TSQ))[rank(pheno_tsq$TSQ)] # Map packyears to gradient colors
plotMDS(exprs_tsq,
gene.selection = "common",
main = "PCA for GSE7895 (darker blue ~ more time since quitting)",
col = colz_TSQ,
pch = 1
#labels = gs
)
legend("topright", legend = range(pheno_tsq$TSQ),
fill = palette(2),
title = "TSQ")
## Does not seem to be TSQ effect
This is potentially problematic, but I propose that if the genes determined to be “persistent” can differentiate between the groups as expected in PCA, it will be evidence that the results are valid despite the groups not being differentiated by all the genes taken as a whole.
Note that I began trying to do this analysis accounting for pack years and TSQ (see other script), but for now I am just looking at the smoking status comparisons alone.
v <- vooma(gset, design, plot=T)
v$genes <- fData(gset) # attach gene annotations
# fit linear model
fit <- lmFit(v)
# set up contrasts of interest and recalculate model coefficients
#cts <- c(paste(groups[1],"-",groups[2],sep=""), paste(groups[1],"-",groups[3],sep=""), paste(groups[2],"-",groups[3],sep=""))
#cont.matrix <- makeContrasts(contrasts=cts, levels=design)
cont.matrix <- makeContrasts(
CS_vs_NS = current_smoker - never_smoker,
FS_vs_NS = former_smoker - never_smoker,
CS_vs_FS = current_smoker - former_smoker,
levels = design
)
fit2 <- contrasts.fit(fit, cont.matrix)
# compute statistics and table of top significant genes
fit2 <- eBayes(fit2, proportion = 0.01) # Proportion is "assumed proportion of genes which are differentially expressed"
library(dplyr)
library(VennDiagram)
## Separate out genes that are DEGS in CS vs NS and FS vs NS
## Note: I have decided not to filter out genes that are significantly different between CS and FS because I realized that doesn't make logical sense.
# summarize test results as "up", "down" or "not expressed"
dT <- decideTests(fit2, adjust.method="fdr", p.value=0.05, lfc=0)
# Venn diagram of results
vennDiagram(dT)
# Select the genes differentially expressed in both CS_vs_NS and FS_vs_NS
dT_persistent <- dT %>%
as.data.frame(.) %>%
filter(CS_vs_NS != 0) %>% # Differentially expressed in CS vs NS
filter(CS_vs_NS == FS_vs_NS)# Differentially expressed, same sign in CS vs FS
nrow(dT_persistent) # 128 genes indeed
## [1] 128
# Get the toptable format for all genes
tT <- topTable(fit2, adjust="fdr", sort.by="B", number=Inf) # Inf shows all the significant genes
# Filter to the "persistent" genes
tT_persistent <- tT %>%
filter(ID %in% rownames(dT_persistent))
# Filter out blanks, keep lower FDR of ties
tT_persistent <- tT_persistent %>%
filter(Gene.symbol != "") %>% # Remove blank gene symbols
filter(adj.P.Val <= 0.05) %>% # Remove FDR > 0.05 genes
group_by(Gene.symbol) %>%
slice_min(adj.P.Val, with_ties = TRUE) %>%
# For probesets mapping to same gene, keep one with lowest FDR. Keep ties for now to check later.
ungroup()
nrow(tT_persistent)
## [1] 116
# Checking for ties
ties <- tT_persistent%>%
group_by(Gene.symbol) %>%
filter(n() > 1) %>%
ungroup()
print(ties)
## # A tibble: 2 × 28
## ID Gene.title Gene.symbol Gene.ID UniGene.title UniGene.symbol UniGene.ID
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 214303… mucin 5AC… MUC5AC 4586 "" "" ""
## 2 214385… mucin 5AC… MUC5AC 4586 "" "" ""
## # ℹ 21 more variables: Nucleotide.Title <chr>, GI <int>,
## # GenBank.Accession <chr>, Platform_CLONEID <lgl>, Platform_ORF <lgl>,
## # Platform_SPOTID <chr>, Chromosome.location <chr>,
## # Chromosome.annotation <chr>, GO.Function <chr>, GO.Process <chr>,
## # GO.Component <chr>, GO.Function.ID <chr>, GO.Process.ID <chr>,
## # GO.Component.ID <chr>, CS_vs_NS <dbl>, FS_vs_NS <dbl>, CS_vs_FS <dbl>,
## # AveExpr <dbl>, F <dbl>, P.Value <dbl>, adj.P.Val <dbl>
# As there is a tie with MUCA5 I will remove the MUCA5 probe with an "x" label for cross-reactivity
tT_persistent <- tT_persistent %>% filter (ID != "214303_x_at")
#Pick the columns we care about
GSE7895_persistent_DEGs <- tT_persistent %>%
dplyr::select(., Gene.symbol, CS_vs_NS, FS_vs_NS, CS_vs_FS, adj.P.Val) %>%
dplyr::rename(., Gene = Gene.symbol, CS_NS_A2 = CS_vs_NS, FS_NS_A2 = FS_vs_NS, CS_FS_A2 = CS_vs_FS, FDR_A2 = adj.P.Val)
# Save output
write.table(GSE7895_persistent_DEGs, "../2_Outputs/1_Airway_DEGs/GSE7895_persistent_DEGs_20241127.txt")
## Filter exprs to the "persistent" genes
exprs_persistent <- as.data.frame(exprs(gset)) %>%
filter(rownames(.) %in% tT_persistent$ID)
## Plot PCA ##
colz<- as.numeric(as.factor(gs)) # Get color values from group
plotMDS(exprs_persistent,
gene.selection = "common",
main = "PCA for GSE7895 with persistent genes",
col = colz,
pch = 1
#labels = gs
)
legend("topright", legend = levels(as.factor(gs)),
fill = unique(colz),
title = "Smoking status")
# You can see more separation happening, but I would expect to see current and former smokers more mixed together, whereas we see former and never smokers more mixed together. Hmm okay, interesting at least.
# Might be good to check on the age, packyears and TSQ here as well?
## Plot PCA of age ##
colz_age <- palette(length(phenotypic_data$age))[rank(phenotypic_data$age)] # Map ages to gradient colors
plotMDS(exprs_persistent,
gene.selection = "common",
main = "PCA for GSE7895 (darker blue ~ higher age)",
col = colz_age,
pch = 16
)
# Add a color bar for age
legend("topright", legend = range(phenotypic_data$age),
fill = palette(2),
title = "Age")
# Does not seem to be an age effect
### Plot PCA of packyears ###
# Excluding packyears of zero (never smokers)
exprs_persistent_packyears <- as.data.frame(exprs_persistent) %>%
dplyr::select(rownames(pheno_packyears))
colz_packyears <- palette(length(pheno_packyears$packyears))[rank(pheno_packyears$packyears)] # Map packyears to gradient colors
plotMDS(exprs_persistent_packyears,
gene.selection = "common",
main = "PCA for GSE7895 persistent genes (darker blue ~ higher packyears)",
col = colz_packyears,
pch = 16
#labels = gs
)
# Add a color bar for packyears
legend("bottomleft", legend = range(pheno_packyears$packyears),
fill = palette(2),
title = "Packyears")
## Maybe some sort of packyears effect happening, not obviously so
### Plot PCA of time since quitting ###
exprs_persistent_tsq <- as.data.frame(exprs_persistent) %>%
dplyr::select(rownames(pheno_tsq))
colz_TSQ <- palette(length(pheno_tsq$TSQ))[rank(pheno_tsq$TSQ)] # Map packyears to gradient colors
plotMDS(exprs_persistent_tsq ,
gene.selection = "common",
main = "PCA for GSE7895 persistent genes (darker blue ~ more months since quitting)",
col = colz_TSQ,
pch = 16
#labels = gs
)
legend("bottomleft", legend = range(pheno_tsq$TSQ),
fill = palette(2),
title = "TSQ")
## Maybe some TSQ effect but not super obvious
GSE63127_CS_NS_GEO2R_limma_sig <- read.table("../2_Outputs/1_Airway_DEGs/GSE63127_CS_NS_GEO2R_limma_sig_20241115.txt", header = TRUE)
DMeth_LUAD_T_NM_hm450_sig <- read.table("../2_Outputs/5_Tumor_DMGs/DMeth_LUAD_T_NM_hm450_sig_20241108_PM.txt")
### Testing log2FC cutoffs ###
# A1 DEGs
log2Thres_A1=0
nrow(GSE63127_CS_NS_GEO2R_limma_sig[abs(GSE63127_CS_NS_GEO2R_limma_sig$logFC)>log2Thres_A1,])
## [1] 7105
# TCGA-LUAD DEGs
log2Thres_TE=0
nrow(DGE_LUAD_T_NM_signed_rank_sig[abs(DGE_LUAD_T_NM_signed_rank_sig$log2foldChange)>log2Thres_TE,])
## [1] 13465
# TCGA-LUAD DMGs
log2Thres_TM=0
nrow(DMeth_LUAD_T_NM_hm450_sig[abs(DMeth_LUAD_T_NM_hm450_sig$log2foldChange)>log2Thres_TM,])
## [1] 9868
## Notes on number of linked genes after filtering:
# 0, 0, 0 => 950 genes
# 0.2, 1, 0.3 => 143 genes
# 0.5, 2, 0.5 => 14 genes
# 0.5, 1, 0.5 => 26 genes (but still good correlation across board unlike with other options)
### Applying log2FC cutoffs and formatting consistently for merge ###
GSE63127_CS_NS_GEO2R_limma_sig_cutoff <- GSE63127_CS_NS_GEO2R_limma_sig %>%
filter(abs(logFC)>log2Thres_A1) %>%
dplyr::rename(Gene = Gene.symbol, FDR_A1 = adj.P.Val, log2FC_A1 = logFC) %>%
dplyr::select(., Gene, FDR_A1, log2FC_A1)
DGE_LUAD_T_NM_signed_rank_sig_cutoff <- DGE_LUAD_T_NM_signed_rank_sig %>%
filter(abs(log2foldChange)>log2Thres_TE) %>%
dplyr::rename(., FDR_TE = FDR, log2FC_TE = log2foldChange) %>%
dplyr::select(., Gene, FDR_TE, log2FC_TE)
DMeth_LUAD_T_NM_hm450_sig_cutoff <- DMeth_LUAD_T_NM_hm450_sig %>%
filter(abs(log2foldChange)>log2Thres_TM) %>%
dplyr::rename(., FDR_TM = FDR, log2FC_TM = log2foldChange) %>%
dplyr::select(., Gene, FDR_TM, log2FC_TM)
## 2024/11/27 -- trying out the deseq2 T-E list for comparison
DGE_LUAD_T_NM_DESeq2_sig_cutoff <- DGE_LUAD_T_NM_DESeq2_sig %>%
filter(abs(log2FC)>log2Thres_TE) %>%
dplyr::rename(., FDR_TE = FDR, log2FC_TE = log2FC, Gene = gene) %>%
dplyr::select(., Gene, FDR_TE, log2FC_TE)
## Merge the lists
# A1_TE_merged_DEGs <- GSE63127_CS_NS_GEO2R_limma_sig_cutoff %>%
# inner_join(., DGE_LUAD_T_NM_signed_rank_sig_cutoff, by = "Gene") %>%
# filter(sign(log2FC_A1)==sign(log2FC_TE)) # Filter to genes with same signs
#
# nrow(A1_TE_merged_DEGs)
#2024/11/27: Using the DESeq2 list instead
A1_TE_merged_DEGs <- GSE63127_CS_NS_GEO2R_limma_sig_cutoff %>%
inner_join(., DGE_LUAD_T_NM_DESeq2_sig_cutoff, by = "Gene") %>%
filter(sign(log2FC_A1)==sign(log2FC_TE)) # Filter to genes with same signs
nrow(A1_TE_merged_DEGs)
## [1] 2264
## Merge the lists
A1_TE_TM_linked_genes <- A1_TE_merged_DEGs %>%
inner_join(., DMeth_LUAD_T_NM_hm450_sig_cutoff, by = "Gene") %>%
filter(sign(log2FC_A1)!=sign(log2FC_TM)) # Filter to genes with opposite sign
nrow(A1_TE_TM_linked_genes)
## [1] 944
Note: Maybe this should incorporate a Spearman correlation filter? But for this I would need to pair samples across the tumor datasets I think, at the expression/M value level. So you can get an idea of the broader correlation with the log2FC values but to apply it more broadly you need the initial tables instead.
## Visualizing Spearman correlations
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(A1_TE_TM_linked_genes[, c("log2FC_A1", "log2FC_TE", "log2FC_TM")],
upper = list(continuous = wrap("cor", method = "spearman")),
lower = list(continuous = "points", combo = "facethist", discrete = "facetbar", na ="na"),
diag = list(continuous = "densityDiag", discrete = "barDiag", na = "naDiag"))
mlr_airway_model <- lm(log2FC_A1 ~ log2FC_TE + log2FC_TM, data = A1_TE_TM_linked_genes)
summary(mlr_airway_model)
##
## Call:
## lm(formula = log2FC_A1 ~ log2FC_TE + log2FC_TM, data = A1_TE_TM_linked_genes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.52245 -0.12802 -0.02276 0.09615 1.96406
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.048916 0.009493 5.153 3.12e-07 ***
## log2FC_TE 0.200719 0.008084 24.830 < 2e-16 ***
## log2FC_TM -0.222290 0.021508 -10.335 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2818 on 941 degrees of freedom
## Multiple R-squared: 0.6143, Adjusted R-squared: 0.6135
## F-statistic: 749.3 on 2 and 941 DF, p-value: < 2.2e-16
library(scatterplot3d)
s3d <- scatterplot3d(x = A1_TE_TM_linked_genes$log2FC_A1,
y = A1_TE_TM_linked_genes$log2FC_TE,
z = A1_TE_TM_linked_genes$log2FC_TM,
main="Plotting airway expression vs. tumor expression vs. tumor methylation",
xlab = "log2FC(TE)",
ylab = "log2FC(TM)",
zlab = "log2FC(A1)",
pch = 19,
#color = colors_3d, # getting color values from methylation sign
color = "steelblue",
labels = A1_TE_TM_linked_genes$Gene,
type = "h",
#highlight.3d = TRUE,
angle = 60
)
## Warning in title(main, sub, ...): "labels" is not a graphical parameter
## Warning in segments(x, z, x, z2, col = col, cex = cex.symbols, lty = lty.hplot,
## : "labels" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "labels" is not a
## graphical parameter
s3d$plane3d(mlr_airway_model) # Add a plane based on the multiple linear regression model
# For now, no cutoffs
A1_TE_TM_A2_persistent_linked_genes <- A1_TE_TM_linked_genes %>%
dplyr::inner_join(., GSE7895_persistent_DEGs, by = "Gene") %>%
dplyr::filter(sign(log2FC_A1) == sign(CS_NS_A2)) # Ensure signs are the same between A1 and A2 CS vs NS
A1_TE_TM_A2_persistent_linked_genes
## Gene FDR_A1 log2FC_A1 FDR_TE log2FC_TE FDR_TM
## 1 ALDOA 2.566855e-09 0.3770692 1.472728e-19 1.1579009 1.321686e-04
## 2 CA12 8.257875e-09 0.5580391 1.216350e-05 1.1408287 1.036252e-04
## 3 CCDC81 3.903929e-05 -0.3596527 4.544984e-10 -1.7577192 1.337249e-08
## 4 CEACAM5 3.601041e-21 2.4094841 1.052641e-26 4.2113900 2.014160e-04
## 5 CEACAM6 5.102659e-22 0.8945448 4.021570e-12 1.7217246 3.898486e-07
## 6 CTNNAL1 9.011552e-08 -0.2950499 3.863514e-17 -1.2043124 1.261212e-05
## 7 DEFB1 5.131697e-09 0.9223731 1.017661e-02 0.8351949 1.032043e-05
## 8 EFEMP1 1.212608e-05 -0.3657118 1.052726e-19 -1.4834837 6.903960e-08
## 9 EPAS1 2.834268e-12 -0.4954380 1.041468e-90 -2.7396724 1.496168e-05
## 10 FAM189A2 2.483296e-07 -0.2839072 1.861610e-72 -3.0991189 4.821474e-06
## 11 GMDS 4.611052e-10 0.6057314 1.519206e-28 1.4970196 3.393904e-08
## 12 GPX2 1.654570e-45 2.5506583 1.570672e-47 5.9339828 1.643197e-06
## 13 HTATIP2 3.945663e-29 0.9111148 9.266875e-07 0.5942826 2.582063e-07
## 14 LMO2 1.571661e-03 -0.3886250 5.096911e-49 -1.7873761 2.616719e-09
## 15 MBOAT7 2.914710e-12 0.5137116 2.262034e-05 0.4549056 1.643197e-06
## 16 N4BP2L1 1.356102e-06 -0.3125984 9.495642e-18 -1.1038533 4.206529e-08
## 17 NQO1 5.660885e-42 1.5172869 5.694570e-34 3.2010657 2.561175e-09
## 18 PTPRM 1.154749e-07 -0.3737652 2.245852e-18 -1.2072743 5.703237e-08
## 19 PYGB 1.305714e-02 0.3756975 6.681797e-03 0.3343992 3.084027e-09
## 20 RERE 7.996695e-04 -0.1771604 2.536276e-03 -0.2742240 2.812878e-05
## 21 S100P 3.017522e-10 0.6501092 4.170991e-40 4.8622959 1.705580e-06
## 22 SERPINB5 3.053668e-02 0.3601096 1.449405e-26 4.5919694 2.561175e-09
## 23 TALDO1 7.040113e-31 0.8134359 1.270123e-04 0.6705743 6.500921e-05
## 24 TLE1 1.734041e-12 0.5240967 1.366377e-03 0.3583536 1.049490e-03
## 25 TMPRSS4 2.289513e-07 0.4506248 2.938960e-68 4.8578293 1.358720e-07
## 26 TNS1 1.227499e-04 -0.2749736 1.376634e-64 -2.1624629 2.800355e-08
## 27 TXN 3.094024e-14 0.4572731 7.159367e-07 0.8241842 3.858088e-09
## log2FC_TM CS_NS_A2 FS_NS_A2 CS_FS_A2 FDR_A2
## 1 -0.16059240 0.4032802 0.3407331 0.062547093 2.776254e-03
## 2 -0.14627531 1.5430724 1.2437664 0.299306037 2.254072e-05
## 3 0.78081385 -1.2597645 -1.2047266 -0.055037898 3.951014e-03
## 4 -0.12273499 3.0351572 2.2437012 0.791456072 1.351057e-08
## 5 -0.23969715 0.9338987 0.6059897 0.327909015 5.320093e-06
## 6 0.08931844 -1.2184931 -1.0133587 -0.205134470 1.135857e-03
## 7 -0.18090111 1.9923476 1.5363189 0.456028674 3.839433e-05
## 8 0.51869617 -0.8270279 -0.9278730 0.100845102 7.527296e-03
## 9 0.27037980 -0.7916370 -0.7853296 -0.006307385 9.672029e-05
## 10 0.20654108 -0.2321193 -0.2270856 -0.005033630 1.191916e-02
## 11 -0.26437554 0.7874442 0.6092521 0.178192117 1.127664e-04
## 12 -0.33166918 2.9950716 1.5416876 1.453383958 8.082128e-17
## 13 -0.28846048 1.1208826 0.5714055 0.549477174 3.952368e-12
## 14 2.40759033 -0.6292862 -0.5040922 -0.125194010 1.146276e-03
## 15 -0.22613719 0.4638591 0.5691029 -0.105243813 1.670886e-02
## 16 0.62487237 -0.4475380 -0.4545522 0.007014150 1.620992e-03
## 17 -0.57757388 2.0600457 0.9921077 1.067938017 8.749026e-18
## 18 0.56915956 -0.2356382 -0.2915727 0.055934474 2.009266e-02
## 19 -0.27504588 0.2738300 0.3489422 -0.075112202 2.863405e-02
## 20 0.37479080 -0.3033106 -0.4886218 0.185311261 2.502948e-03
## 21 -0.29162868 0.8341189 0.6625807 0.171538228 7.117128e-05
## 22 -0.53839643 0.9229824 1.0296751 -0.106692664 2.566330e-02
## 23 -0.08999397 1.2640972 0.6498642 0.614233002 1.138555e-12
## 24 -0.15290994 0.6730828 0.5935444 0.079538406 2.353419e-04
## 25 -0.35746941 0.8303343 0.7097247 0.120609563 9.792030e-05
## 26 1.21544680 -0.7633155 -0.6635958 -0.099719752 1.752972e-04
## 27 -0.34520577 1.1010514 0.6418392 0.459212192 3.161799e-08
## Recording results based on cutoffs (when I was using the signed-rank test for TE - now I use DESeq2)
## 0.5,1,0.5,0 => 1 (NQO1)
## 0,0,0,0 => 27
## 0.2, 1, 0.3 => 10
cor.test(A1_TE_TM_A2_persistent_linked_genes$log2FC_A1, y = A1_TE_TM_A2_persistent_linked_genes$CS_NS_A2, use = "everything",
method = "spearman")
##
## Spearman's rank correlation rho
##
## data: A1_TE_TM_A2_persistent_linked_genes$log2FC_A1 and A1_TE_TM_A2_persistent_linked_genes$CS_NS_A2
## S = 292, p-value = 9.054e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9108669
ggplot(A1_TE_TM_A2_persistent_linked_genes, aes(x = log2FC_A1, y = CS_NS_A2),
geom_point(color = "blue", size = 3) + # Scatter points
geom_smooth(method = "lm", se = FALSE, color = "red")) # Add a trend line
ggplot(A1_TE_TM_A2_persistent_linked_genes, aes(x = log2FC_A1, y = CS_NS_A2)) +
geom_point(color = "blue", size = 3) + # Scatter points
geom_smooth(method = "lm", se = FALSE, color = "black") + # Add a trend line +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
2024/11/27: Here is a very significant interaction network I got when running using the DESeq2 TE with no cutoffs: https://version-12-0.string-db.org/cgi/network?networkId=baZv6RvUQLCS
# write.table(A1_TE_TM_linked_genes, "../2_Outputs/A1_TE_TM_linked_genes_nocutoffs_20241112.txt", sep = '\t')
#write.table(A1_TE_TM_A2_persistent_linked_genes, "../2_Outputs/A1_TE_TM_A2_persistent_linked_genes_DESeq2_nocutoffs_20241128.txt", sep = '\t')